{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "e51cd3d8-2474-489b-ba3a-f3aaeb829024",
   "metadata": {},
   "source": [
    "# 导包并定义函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "9fcc4b7a-04e2-41b9-b5cd-f032e794c895",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T10:14:21.983232Z",
     "iopub.status.busy": "2024-11-11T10:14:21.982708Z",
     "iopub.status.idle": "2024-11-11T10:14:25.063953Z",
     "msg_id": "33103876-2d42-491d-87e8-b512f280c90f",
     "shell.execute_reply": "2024-11-11T10:14:25.063167Z",
     "shell.execute_reply.started": "2024-11-11T10:14:21.983198Z"
    }
   },
   "outputs": [],
   "source": [
    "run B榜复现_导包并定义函数.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "id": "f2ee06e1-e874-4eff-8c18-7b79b4a8482c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T01:29:00.129744Z",
     "iopub.status.busy": "2024-11-11T01:29:00.129262Z",
     "iopub.status.idle": "2024-11-11T01:29:00.489196Z",
     "msg_id": "542a383e-1ced-42bb-8858-94b24cc5c0dc",
     "shell.execute_reply": "2024-11-11T01:29:00.488420Z",
     "shell.execute_reply.started": "2024-11-11T01:29:00.129712Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "os.system('rm -rf tmp')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "5e73dfda-74f3-4748-b546-0fbd0ff58e19",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T01:29:02.206918Z",
     "iopub.status.busy": "2024-11-11T01:29:02.206392Z",
     "iopub.status.idle": "2024-11-11T01:29:02.211050Z",
     "msg_id": "709ddbca-dbcf-4ff0-b521-c5373c9a8c31",
     "shell.execute_reply": "2024-11-11T01:29:02.210374Z",
     "shell.execute_reply.started": "2024-11-11T01:29:02.206880Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "os.makedirs(\"tmp\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f268dd26-967a-46e8-8111-1c3108529c43",
   "metadata": {},
   "source": [
    "# 数据读取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "3bdbc853-e9f6-4572-be55-955f40f20a1f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:17:30.021715Z",
     "iopub.status.busy": "2024-11-08T03:17:30.021368Z",
     "iopub.status.idle": "2024-11-08T03:17:30.344789Z",
     "msg_id": "83bc6c79-2904-4f4f-ba12-f4dbd3bdb17b",
     "shell.execute_reply": "2024-11-08T03:17:30.344093Z",
     "shell.execute_reply.started": "2024-11-08T03:17:30.021687Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 12)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>数据日期</th>\n",
       "      <th>客户编号</th>\n",
       "      <th>经营期限至</th>\n",
       "      <th>经营期限自</th>\n",
       "      <th>经营状态</th>\n",
       "      <th>注册资本</th>\n",
       "      <th>成立日期</th>\n",
       "      <th>法定代表人/负责人/执行事务合伙人</th>\n",
       "      <th>企业（机构）类型编码</th>\n",
       "      <th>所在省份编码</th>\n",
       "      <th>国民经济行业代码</th>\n",
       "      <th>is_train</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20020727</td>\n",
       "      <td>182d6a854532dd26a1b111e77bd501f4</td>\n",
       "      <td>20420701</td>\n",
       "      <td>19920702</td>\n",
       "      <td>在营（开业）</td>\n",
       "      <td>275.77</td>\n",
       "      <td>19920702</td>\n",
       "      <td>444360f253c09f7d97a3b15bb26a8573</td>\n",
       "      <td>46f6ddc7a540fa9e2b5ac3fa24038304</td>\n",
       "      <td>3048e339d9689928fc83eb6aa552ccfb</td>\n",
       "      <td>1407ccc0c9f66ff0402271dada75885e</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20020727</td>\n",
       "      <td>f60def7aa5dc124ddae552b7bf5c7675</td>\n",
       "      <td>长期</td>\n",
       "      <td>19930512</td>\n",
       "      <td>在营（开业）</td>\n",
       "      <td>218.88</td>\n",
       "      <td>19930512</td>\n",
       "      <td>0dbf92ddf037b7727060924e76284d1c</td>\n",
       "      <td>46f6ddc7a540fa9e2b5ac3fa24038304</td>\n",
       "      <td>181b1987746f41b780200a407686ffc5</td>\n",
       "      <td>d5f34ccd26f45e66747884462c45c309</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       数据日期                              客户编号     经营期限至     经营期限自    经营状态  \\\n",
       "0  20020727  182d6a854532dd26a1b111e77bd501f4  20420701  19920702  在营（开业）   \n",
       "1  20020727  f60def7aa5dc124ddae552b7bf5c7675        长期  19930512  在营（开业）   \n",
       "\n",
       "     注册资本      成立日期                 法定代表人/负责人/执行事务合伙人  \\\n",
       "0  275.77  19920702  444360f253c09f7d97a3b15bb26a8573   \n",
       "1  218.88  19930512  0dbf92ddf037b7727060924e76284d1c   \n",
       "\n",
       "                         企业（机构）类型编码                            所在省份编码  \\\n",
       "0  46f6ddc7a540fa9e2b5ac3fa24038304  3048e339d9689928fc83eb6aa552ccfb   \n",
       "1  46f6ddc7a540fa9e2b5ac3fa24038304  181b1987746f41b780200a407686ffc5   \n",
       "\n",
       "                           国民经济行业代码  is_train  \n",
       "0  1407ccc0c9f66ff0402271dada75885e         1  \n",
       "1  d5f34ccd26f45e66747884462c45c309         1  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_name = 'XW_ENTINFO_BASIC'\n",
    "BASIC = get_data(file_name, num_rows=None)\n",
    "print(BASIC.shape)\n",
    "BASIC.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "eac62717-8b22-4bcc-8f24-24e1457afdea",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:17:30.346533Z",
     "iopub.status.busy": "2024-11-08T03:17:30.346026Z",
     "iopub.status.idle": "2024-11-08T03:17:30.421855Z",
     "msg_id": "f2730eb9-642d-4dc4-8968-f57942094b4c",
     "shell.execute_reply": "2024-11-08T03:17:30.421214Z",
     "shell.execute_reply.started": "2024-11-08T03:17:30.346505Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 4)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>数据日期</th>\n",
       "      <th>客户编号</th>\n",
       "      <th>FLAG</th>\n",
       "      <th>is_train</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20030728</td>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20030728</td>\n",
       "      <td>b1d244a25a82adb7beafe33fe971402c</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       数据日期                              客户编号  FLAG  is_train\n",
       "0  20030728  158a8d99bec2a2b652a6de45a2b52ec9   0.0         1\n",
       "1  20030728  b1d244a25a82adb7beafe33fe971402c   0.0         1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_name = 'XW_ENTINFO_TARGET'\n",
    "TARGET = get_data(file_name, num_rows=None)\n",
    "print(TARGET.shape)\n",
    "TARGET.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "3671cde4-6610-4109-a59a-71f0c1234433",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:17:32.942215Z",
     "iopub.status.busy": "2024-11-08T03:17:32.941746Z",
     "iopub.status.idle": "2024-11-08T03:17:32.950457Z",
     "msg_id": "f455fd09-65a0-410d-a131-f234e6923faa",
     "shell.execute_reply": "2024-11-08T03:17:32.949791Z",
     "shell.execute_reply.started": "2024-11-08T03:17:32.942183Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    53094\n",
       "0     6022\n",
       "Name: is_train, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "TARGET['is_train'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0476de3a-f075-43ba-a648-c5a27aeb640c",
   "metadata": {},
   "source": [
    "# 基本信息表业务特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "4e32e624-0e8b-413e-920c-070877754e86",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:20:07.347357Z",
     "iopub.status.busy": "2024-11-08T03:20:07.346615Z",
     "iopub.status.idle": "2024-11-08T03:20:07.380246Z",
     "msg_id": "1cce7494-ac4b-4c8f-be56-3f507d2bca8f",
     "shell.execute_reply": "2024-11-08T03:20:07.379424Z",
     "shell.execute_reply.started": "2024-11-08T03:20:07.347322Z"
    }
   },
   "outputs": [],
   "source": [
    "def BASIC_info():\n",
    "    # 经营期限至：将长期转化为29991231\n",
    "    file_name = 'XW_ENTINFO_BASIC'\n",
    "    BASIC = get_data(file_name, num_rows=None)\n",
    "    BASIC.columns = ['数据日期', '客户编号', '经营期限至', '经营期限自', '经营状态', '注册资本', '成立日期','法定代表人', '企业（机构）类型编码', '所在省份编码', '国民经济行业代码', 'is_train']\n",
    "    data = BASIC.copy()\n",
    "    data['是否长期经营']            = np.where((data['经营期限至'] == '长期'),1,0)\n",
    "    data['经营成立时间是否相等']    = np.where((data['经营期限自'] == data['成立日期']),1,0)\n",
    "    data['注册资金过小']            = np.where((data['注册资本'] <= 47.16),1,0)\n",
    "    data['经营期限至']              = data['经营期限至'].apply(lambda x: 29991231 if x == '长期' else x).astype(int)\n",
    "    data['经营是否已过期']          = data['经营期限至'].apply(lambda x: 1 if x < 20020727 else 0)\n",
    "    #按天\n",
    "    data['剩余经营天数_天']            = data[['经营期限至', '数据日期']].apply(lambda x: two_date_dis(x[0], x[1])[0], axis = 1)  \n",
    "    data['已经营天数_天']              = data[['数据日期', '经营期限自']].apply(lambda x: two_date_dis(x[0], x[1])[0], axis = 1)                \n",
    "    data['当期经营期限总天数_天']      = data[['经营期限至', '经营期限自']].apply(lambda x: two_date_dis(x[0], x[1])[0], axis = 1) \n",
    "    data['自成立经营期限总天数_天']    = data[['经营期限至', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[0], axis = 1) \n",
    "    data['已成立天数_天']              = data[['数据日期', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[0], axis = 1)    #成立日期\n",
    "    data['再次经营_天']                = data[['经营期限自', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[0], axis = 1)\n",
    "    # #按月\n",
    "    data['剩余经营天数_月']            = data[['经营期限至', '数据日期']].apply(lambda x: two_date_dis(x[0], x[1])[1], axis = 1)  \n",
    "    data['已经营天数_月']              = data[['数据日期', '经营期限自']].apply(lambda x: two_date_dis(x[0], x[1])[1], axis = 1)                \n",
    "    data['当期经营期限总天数_月']      = data[['经营期限至', '经营期限自']].apply(lambda x: two_date_dis(x[0], x[1])[1], axis = 1) \n",
    "    data['自成立经营期限总天数_月']    = data[['经营期限至', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[1], axis = 1) \n",
    "    data['已成立天数_月']              = data[['数据日期', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[1], axis = 1)    #成立日期\n",
    "    data['再次经营_月']                = data[['经营期限自', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[1], axis = 1)\n",
    "    #按年\n",
    "    data['剩余经营天数_年']            = data[['经营期限至', '数据日期']].apply(lambda x: two_date_dis(x[0], x[1])[2], axis = 1)  \n",
    "    data['已经营天数_年']              = data[['数据日期', '经营期限自']].apply(lambda x: two_date_dis(x[0], x[1])[2], axis = 1)                \n",
    "    data['当期经营期限总天数_年']      = data[['经营期限至', '经营期限自']].apply(lambda x: two_date_dis(x[0], x[1])[2], axis = 1) \n",
    "    data['自成立经营期限总天数_年']    = data[['经营期限至', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[2], axis = 1) \n",
    "    data['已成立天数_年']              = data[['数据日期', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[2], axis = 1)    #成立日期\n",
    "    data['再次经营_年']                = data[['经营期限自', '成立日期']].apply(lambda x: two_date_dis(x[0], x[1])[2], axis = 1)\n",
    "    \n",
    "    #经营状态 训练集中，迁出坏率大于在营坏率大于注销吊销\n",
    "    ENTSTATUS_dict = {'迁出':0, '在营（开业）':1, '注销':2, '吊销，未注销':3}\n",
    "    data['经营状态_编码'] = data['经营状态'].map(ENTSTATUS_dict)\n",
    "    #注册资本\n",
    "    data['注册资本'] = pow((data['注册资本'])/3.12,3).round(2)\n",
    "    #企业类型编码\n",
    "    a = data['企业（机构）类型编码'].value_counts()\n",
    "    head_1 = a.head(10).index.tolist()\n",
    "    tail_1 = a.tail(20).index.tolist()\n",
    "    data[\"企业（机构）类型编码_频数是否前10\"] = data[\"企业（机构）类型编码\"].apply(lambda x: 1 if x in head_1 else 0)\n",
    "    data[\"企业（机构）类型编码_频数是否后20\"] = data[\"企业（机构）类型编码\"].apply(lambda x: 1 if x in tail_1 else 0)\n",
    "    data[\"企业（机构）类型编码_是否频数最高2类\"] = data[\"企业（机构）类型编码\"].apply(lambda x: 1 if x == '46f6ddc7a540fa9e2b5ac3fa24038304' or x == 'd6c937931560c340740515da55cfabb5' else 0)\n",
    "    #省份编码\n",
    "    a = data['所在省份编码'].value_counts()\n",
    "    head_1 = a.head(5).index.tolist()\n",
    "    tail_1 = a.tail(5).index.tolist()\n",
    "    data[\"所在省份编码_频数是否前5\"] = data[\"所在省份编码\"].apply(lambda x: 1 if x in head_1 else 0)\n",
    "    data[\"所在省份编码_频数是否后5\"] = data[\"所在省份编码\"].apply(lambda x: 1 if x in tail_1 else 0)\n",
    "    data[\"企业（机构）类型编码_是否坏率最高2类\"] = data[\"所在省份编码\"].apply(lambda x: 1 if x == 'c3e3dd960e9608c4fc446fe4de09943a' or x == '787f623759f116bd7c5ffdee4bed4a02' else 0)\n",
    "    #国民经济行业代码\n",
    "    a = data['国民经济行业代码'].value_counts()\n",
    "    head_1 = a.head(5).index.tolist()\n",
    "    tail_1 = a.tail(230).index.tolist()\n",
    "    data[\"国民经济行业代码_频数是否前5\"]   = data[\"国民经济行业代码\"].apply(lambda x: 1 if x in head_1 else 0)\n",
    "    data[\"国民经济行业代码_频数是否后230\"] = data[\"国民经济行业代码\"].apply(lambda x: 1 if x in tail_1 else 0)\n",
    "    \n",
    "    #分箱（由于数据泄露，最后没有使用）\n",
    "    data['企业（机构）类型编码_分箱'] = data['企业（机构）类型编码']\n",
    "    data['所在省份编码_分箱'] = data['所在省份编码']\n",
    "    data['国民经济行业代码_分箱'] = data['国民经济行业代码']\n",
    "    \n",
    "    file_name = 'XW_ENTINFO_TARGET'\n",
    "    TARGET = get_data(file_name, num_rows=None)\n",
    "    TARGET_train = TARGET[TARGET['is_train'] == 1]\n",
    "    BASIC_train = data[data['is_train'] == 1]\n",
    "    BASE_train_TARGET = TARGET_train.drop(['is_train', '数据日期'], axis = 1).merge(BASIC_train[['客户编号', '企业（机构）类型编码_分箱', '所在省份编码_分箱', '国民经济行业代码_分箱']], on = '客户编号', how = 'inner')\n",
    "    \n",
    "    BASE_train_TARGET = BASE_train_TARGET.drop(['客户编号'], axis = 1)\n",
    "    #分箱\n",
    "    c = toad.transform.Combiner()\n",
    "    c.fit(BASE_train_TARGET, y = 'FLAG', method = 'chi', min_samples = 0.02, empty_separate = False)\n",
    "    data = c.transform(data, labels=False)\n",
    "    \n",
    "    #法定代表人\n",
    "    a = data.groupby('法定代表人').agg({'客户编号':'count', '企业（机构）类型编码': 'nunique', '国民经济行业代码': 'nunique', '所在省份编码': 'nunique'})                                    \n",
    "    a = a.reset_index()\n",
    "    a.columns = ['法定代表人', '法定代表人相关企业个数', '法人涉足企业类型', '法人涉足国民经济行业代码', '法人跨省个数']\n",
    "    data = data.merge(a, how = 'left', on = '法定代表人')\n",
    "    data = data.drop(['数据日期', 'is_train', '经营期限至', '经营期限自', '成立日期', '法定代表人', '经营状态', '企业（机构）类型编码', '所在省份编码', '国民经济行业代码'], axis = 1)\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "fb95e6ab-075f-42ef-bcf7-6864b0cd9d7b",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:20:07.562465Z",
     "iopub.status.busy": "2024-11-08T03:20:07.561889Z",
     "iopub.status.idle": "2024-11-08T03:20:26.660468Z",
     "msg_id": "361b2d1a-663c-485e-9de4-4c4cca9fc870",
     "shell.execute_reply": "2024-11-08T03:20:26.659749Z",
     "shell.execute_reply.started": "2024-11-08T03:20:07.562438Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 40)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>注册资本</th>\n",
       "      <th>是否长期经营</th>\n",
       "      <th>经营成立时间是否相等</th>\n",
       "      <th>注册资金过小</th>\n",
       "      <th>经营是否已过期</th>\n",
       "      <th>剩余经营天数_天</th>\n",
       "      <th>已经营天数_天</th>\n",
       "      <th>当期经营期限总天数_天</th>\n",
       "      <th>自成立经营期限总天数_天</th>\n",
       "      <th>已成立天数_天</th>\n",
       "      <th>再次经营_天</th>\n",
       "      <th>剩余经营天数_月</th>\n",
       "      <th>已经营天数_月</th>\n",
       "      <th>当期经营期限总天数_月</th>\n",
       "      <th>自成立经营期限总天数_月</th>\n",
       "      <th>已成立天数_月</th>\n",
       "      <th>再次经营_月</th>\n",
       "      <th>剩余经营天数_年</th>\n",
       "      <th>已经营天数_年</th>\n",
       "      <th>当期经营期限总天数_年</th>\n",
       "      <th>自成立经营期限总天数_年</th>\n",
       "      <th>已成立天数_年</th>\n",
       "      <th>再次经营_年</th>\n",
       "      <th>经营状态_编码</th>\n",
       "      <th>企业（机构）类型编码_频数是否前10</th>\n",
       "      <th>企业（机构）类型编码_频数是否后20</th>\n",
       "      <th>企业（机构）类型编码_是否频数最高2类</th>\n",
       "      <th>所在省份编码_频数是否前5</th>\n",
       "      <th>所在省份编码_频数是否后5</th>\n",
       "      <th>企业（机构）类型编码_是否坏率最高2类</th>\n",
       "      <th>国民经济行业代码_频数是否前5</th>\n",
       "      <th>国民经济行业代码_频数是否后230</th>\n",
       "      <th>企业（机构）类型编码_分箱</th>\n",
       "      <th>所在省份编码_分箱</th>\n",
       "      <th>国民经济行业代码_分箱</th>\n",
       "      <th>法定代表人相关企业个数</th>\n",
       "      <th>法人涉足企业类型</th>\n",
       "      <th>法人涉足国民经济行业代码</th>\n",
       "      <th>法人跨省个数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>182d6a854532dd26a1b111e77bd501f4</td>\n",
       "      <td>690521.61</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>14574</td>\n",
       "      <td>3675</td>\n",
       "      <td>18249</td>\n",
       "      <td>18249</td>\n",
       "      <td>3675</td>\n",
       "      <td>0</td>\n",
       "      <td>480</td>\n",
       "      <td>120</td>\n",
       "      <td>600</td>\n",
       "      <td>600</td>\n",
       "      <td>120</td>\n",
       "      <td>0</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>50.000000</td>\n",
       "      <td>50.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>f60def7aa5dc124ddae552b7bf5c7675</td>\n",
       "      <td>345266.51</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>364059</td>\n",
       "      <td>3360</td>\n",
       "      <td>367419</td>\n",
       "      <td>367419</td>\n",
       "      <td>3360</td>\n",
       "      <td>0</td>\n",
       "      <td>11969</td>\n",
       "      <td>110</td>\n",
       "      <td>12079</td>\n",
       "      <td>12079</td>\n",
       "      <td>110</td>\n",
       "      <td>0</td>\n",
       "      <td>997.416667</td>\n",
       "      <td>9.166667</td>\n",
       "      <td>1006.583333</td>\n",
       "      <td>1006.583333</td>\n",
       "      <td>9.166667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号       注册资本  是否长期经营  经营成立时间是否相等  注册资金过小  \\\n",
       "0  182d6a854532dd26a1b111e77bd501f4  690521.61       0           1       0   \n",
       "1  f60def7aa5dc124ddae552b7bf5c7675  345266.51       1           1       0   \n",
       "\n",
       "   经营是否已过期  剩余经营天数_天  已经营天数_天  当期经营期限总天数_天  自成立经营期限总天数_天  已成立天数_天  再次经营_天  \\\n",
       "0        0     14574     3675        18249         18249     3675       0   \n",
       "1        0    364059     3360       367419        367419     3360       0   \n",
       "\n",
       "   剩余经营天数_月  已经营天数_月  当期经营期限总天数_月  自成立经营期限总天数_月  已成立天数_月  再次经营_月    剩余经营天数_年  \\\n",
       "0       480      120          600           600      120       0   40.000000   \n",
       "1     11969      110        12079         12079      110       0  997.416667   \n",
       "\n",
       "     已经营天数_年  当期经营期限总天数_年  自成立经营期限总天数_年    已成立天数_年  再次经营_年  经营状态_编码  \\\n",
       "0  10.000000    50.000000     50.000000  10.000000     0.0        1   \n",
       "1   9.166667  1006.583333   1006.583333   9.166667     0.0        1   \n",
       "\n",
       "   企业（机构）类型编码_频数是否前10  企业（机构）类型编码_频数是否后20  企业（机构）类型编码_是否频数最高2类  所在省份编码_频数是否前5  \\\n",
       "0                   1                   0                    1              0   \n",
       "1                   1                   0                    1              0   \n",
       "\n",
       "   所在省份编码_频数是否后5  企业（机构）类型编码_是否坏率最高2类  国民经济行业代码_频数是否前5  国民经济行业代码_频数是否后230  \\\n",
       "0              0                    0                0                  0   \n",
       "1              0                    0                0                  0   \n",
       "\n",
       "   企业（机构）类型编码_分箱  所在省份编码_分箱  国民经济行业代码_分箱  法定代表人相关企业个数  法人涉足企业类型  法人涉足国民经济行业代码  \\\n",
       "0              1          3            2            1         1             1   \n",
       "1              1          3            4            1         1             1   \n",
       "\n",
       "   法人跨省个数  \n",
       "0       1  \n",
       "1       1  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "basic_info = BASIC_info()\n",
    "print(basic_info.shape)\n",
    "basic_info.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "71f03b96-2668-441e-8d99-4df9a87a06e4",
   "metadata": {},
   "source": [
    "# 基本信息表文本特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c1f1bbd0-2df8-48e3-a1e7-689326422525",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:20:26.662199Z",
     "iopub.status.busy": "2024-11-08T03:20:26.661846Z",
     "iopub.status.idle": "2024-11-08T03:20:26.671551Z",
     "msg_id": "1154f784-67cc-4392-ba36-0ba0b1f0b659",
     "shell.execute_reply": "2024-11-08T03:20:26.670785Z",
     "shell.execute_reply.started": "2024-11-08T03:20:26.662171Z"
    }
   },
   "outputs": [],
   "source": [
    "def BASIC_text():\n",
    "    file_name = 'XW_ENTINFO_BASIC'\n",
    "    BASIC = get_data(file_name, num_rows=None)\n",
    "    BASIC.columns = ['数据日期', '客户编号', '经营期限至', '经营期限自', '经营状态', '注册资本', '成立日期','法定代表人', '企业（机构）类型编码', '所在省份编码', '国民经济行业代码', 'is_train']\n",
    "    file_name = 'XW_ENTINFO_TARGET'\n",
    "    TARGET = get_data(file_name, num_rows=None)\n",
    "    TARGET = TARGET.drop(['数据日期'], axis = 1)\n",
    "    for i in ['法定代表人', '企业（机构）类型编码', '所在省份编码', '国民经济行业代码']:\n",
    "        tmp = text_feats(BASIC, '客户编号', i, num=10)\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "    for i in ['法定代表人', '企业（机构）类型编码', '所在省份编码', '国民经济行业代码']:\n",
    "        tmp = word2vec_feature(BASIC, '客户编号', i, ext=\"B\")\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "\n",
    "    BASIC['企业机构类型_所在省份'] = BASIC['企业（机构）类型编码'] + BASIC['所在省份编码'] \n",
    "    BASIC['企业机构类型_国民经济行业代码'] = BASIC['企业（机构）类型编码'] + BASIC['国民经济行业代码']\n",
    "    BASIC['所在省份_国民经济行业代码'] = BASIC['所在省份编码'] + BASIC['国民经济行业代码'] \n",
    "    for i in ['企业机构类型_所在省份', '企业机构类型_国民经济行业代码', '所在省份_国民经济行业代码']:\n",
    "        tmp = text_feats(BASIC, '客户编号',  i, num=10)\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "        tmp = word2vec_feature(BASIC, '客户编号', i, ext=\"B\")\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "        \n",
    "    TARGET = TARGET.drop(['is_train', 'FLAG'], axis = 1)\n",
    "    return TARGET"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "6e4aa703-8e52-456c-ad69-ca06a98b463c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:20:26.672785Z",
     "iopub.status.busy": "2024-11-08T03:20:26.672406Z",
     "iopub.status.idle": "2024-11-08T03:21:21.657734Z",
     "msg_id": "ff5cf61a-7226-4f7f-8b26-fb2e8fe2b4a1",
     "shell.execute_reply": "2024-11-08T03:21:21.656974Z",
     "shell.execute_reply.started": "2024-11-08T03:20:26.672759Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 197)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>法定代表人_tfidf_0</th>\n",
       "      <th>法定代表人_tfidf_1</th>\n",
       "      <th>法定代表人_tfidf_2</th>\n",
       "      <th>法定代表人_tfidf_3</th>\n",
       "      <th>法定代表人_tfidf_4</th>\n",
       "      <th>法定代表人_tfidf_5</th>\n",
       "      <th>法定代表人_tfidf_6</th>\n",
       "      <th>法定代表人_tfidf_7</th>\n",
       "      <th>法定代表人_tfidf_8</th>\n",
       "      <th>法定代表人_tfidf_9</th>\n",
       "      <th>法定代表人_countvec_0</th>\n",
       "      <th>法定代表人_countvec_1</th>\n",
       "      <th>法定代表人_countvec_2</th>\n",
       "      <th>法定代表人_countvec_3</th>\n",
       "      <th>法定代表人_countvec_4</th>\n",
       "      <th>法定代表人_countvec_5</th>\n",
       "      <th>法定代表人_countvec_6</th>\n",
       "      <th>法定代表人_countvec_7</th>\n",
       "      <th>法定代表人_countvec_8</th>\n",
       "      <th>法定代表人_countvec_9</th>\n",
       "      <th>企业（机构）类型编码_tfidf_0</th>\n",
       "      <th>企业（机构）类型编码_tfidf_1</th>\n",
       "      <th>企业（机构）类型编码_tfidf_2</th>\n",
       "      <th>企业（机构）类型编码_tfidf_3</th>\n",
       "      <th>企业（机构）类型编码_tfidf_4</th>\n",
       "      <th>企业（机构）类型编码_tfidf_5</th>\n",
       "      <th>企业（机构）类型编码_tfidf_6</th>\n",
       "      <th>企业（机构）类型编码_tfidf_7</th>\n",
       "      <th>企业（机构）类型编码_tfidf_8</th>\n",
       "      <th>企业（机构）类型编码_tfidf_9</th>\n",
       "      <th>企业（机构）类型编码_countvec_0</th>\n",
       "      <th>企业（机构）类型编码_countvec_1</th>\n",
       "      <th>企业（机构）类型编码_countvec_2</th>\n",
       "      <th>企业（机构）类型编码_countvec_3</th>\n",
       "      <th>企业（机构）类型编码_countvec_4</th>\n",
       "      <th>企业（机构）类型编码_countvec_5</th>\n",
       "      <th>企业（机构）类型编码_countvec_6</th>\n",
       "      <th>企业（机构）类型编码_countvec_7</th>\n",
       "      <th>企业（机构）类型编码_countvec_8</th>\n",
       "      <th>企业（机构）类型编码_countvec_9</th>\n",
       "      <th>所在省份编码_tfidf_0</th>\n",
       "      <th>所在省份编码_tfidf_1</th>\n",
       "      <th>所在省份编码_tfidf_2</th>\n",
       "      <th>所在省份编码_tfidf_3</th>\n",
       "      <th>所在省份编码_tfidf_4</th>\n",
       "      <th>所在省份编码_tfidf_5</th>\n",
       "      <th>所在省份编码_tfidf_6</th>\n",
       "      <th>所在省份编码_tfidf_7</th>\n",
       "      <th>所在省份编码_tfidf_8</th>\n",
       "      <th>所在省份编码_tfidf_9</th>\n",
       "      <th>所在省份编码_countvec_0</th>\n",
       "      <th>所在省份编码_countvec_1</th>\n",
       "      <th>所在省份编码_countvec_2</th>\n",
       "      <th>所在省份编码_countvec_3</th>\n",
       "      <th>所在省份编码_countvec_4</th>\n",
       "      <th>所在省份编码_countvec_5</th>\n",
       "      <th>所在省份编码_countvec_6</th>\n",
       "      <th>所在省份编码_countvec_7</th>\n",
       "      <th>所在省份编码_countvec_8</th>\n",
       "      <th>所在省份编码_countvec_9</th>\n",
       "      <th>国民经济行业代码_tfidf_0</th>\n",
       "      <th>国民经济行业代码_tfidf_1</th>\n",
       "      <th>国民经济行业代码_tfidf_2</th>\n",
       "      <th>国民经济行业代码_tfidf_3</th>\n",
       "      <th>国民经济行业代码_tfidf_4</th>\n",
       "      <th>国民经济行业代码_tfidf_5</th>\n",
       "      <th>国民经济行业代码_tfidf_6</th>\n",
       "      <th>国民经济行业代码_tfidf_7</th>\n",
       "      <th>国民经济行业代码_tfidf_8</th>\n",
       "      <th>国民经济行业代码_tfidf_9</th>\n",
       "      <th>国民经济行业代码_countvec_0</th>\n",
       "      <th>国民经济行业代码_countvec_1</th>\n",
       "      <th>国民经济行业代码_countvec_2</th>\n",
       "      <th>国民经济行业代码_countvec_3</th>\n",
       "      <th>国民经济行业代码_countvec_4</th>\n",
       "      <th>国民经济行业代码_countvec_5</th>\n",
       "      <th>国民经济行业代码_countvec_6</th>\n",
       "      <th>国民经济行业代码_countvec_7</th>\n",
       "      <th>国民经济行业代码_countvec_8</th>\n",
       "      <th>国民经济行业代码_countvec_9</th>\n",
       "      <th>客户编号_法定代表人_w2v_0</th>\n",
       "      <th>客户编号_法定代表人_w2v_1</th>\n",
       "      <th>客户编号_法定代表人_w2v_2</th>\n",
       "      <th>客户编号_法定代表人_w2v_3</th>\n",
       "      <th>客户编号_法定代表人_w2v_4</th>\n",
       "      <th>客户编号_法定代表人_w2v_5</th>\n",
       "      <th>客户编号_法定代表人_w2v_6</th>\n",
       "      <th>客户编号_法定代表人_w2v_7</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_0</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_1</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_2</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_3</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_4</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_5</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_6</th>\n",
       "      <th>客户编号_企业（机构）类型编码_w2v_7</th>\n",
       "      <th>客户编号_所在省份编码_w2v_0</th>\n",
       "      <th>客户编号_所在省份编码_w2v_1</th>\n",
       "      <th>客户编号_所在省份编码_w2v_2</th>\n",
       "      <th>客户编号_所在省份编码_w2v_3</th>\n",
       "      <th>客户编号_所在省份编码_w2v_4</th>\n",
       "      <th>客户编号_所在省份编码_w2v_5</th>\n",
       "      <th>客户编号_所在省份编码_w2v_6</th>\n",
       "      <th>客户编号_所在省份编码_w2v_7</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_0</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_1</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_2</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_3</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_4</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_5</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_6</th>\n",
       "      <th>客户编号_国民经济行业代码_w2v_7</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_0</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_1</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_2</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_3</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_4</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_5</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_6</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_7</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_8</th>\n",
       "      <th>企业机构类型_所在省份_tfidf_9</th>\n",
       "      <th>企业机构类型_所在省份_countvec_0</th>\n",
       "      <th>企业机构类型_所在省份_countvec_1</th>\n",
       "      <th>企业机构类型_所在省份_countvec_2</th>\n",
       "      <th>企业机构类型_所在省份_countvec_3</th>\n",
       "      <th>企业机构类型_所在省份_countvec_4</th>\n",
       "      <th>企业机构类型_所在省份_countvec_5</th>\n",
       "      <th>企业机构类型_所在省份_countvec_6</th>\n",
       "      <th>企业机构类型_所在省份_countvec_7</th>\n",
       "      <th>企业机构类型_所在省份_countvec_8</th>\n",
       "      <th>企业机构类型_所在省份_countvec_9</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_0</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_1</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_2</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_3</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_4</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_5</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_6</th>\n",
       "      <th>客户编号_企业机构类型_所在省份_w2v_7</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_0</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_1</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_2</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_3</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_4</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_5</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_6</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_7</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_8</th>\n",
       "      <th>企业机构类型_国民经济行业代码_tfidf_9</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_0</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_1</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_2</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_3</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_4</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_5</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_6</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_7</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_8</th>\n",
       "      <th>企业机构类型_国民经济行业代码_countvec_9</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_0</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_1</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_2</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_3</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_4</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_5</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_6</th>\n",
       "      <th>客户编号_企业机构类型_国民经济行业代码_w2v_7</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_0</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_1</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_2</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_3</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_4</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_5</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_6</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_7</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_8</th>\n",
       "      <th>所在省份_国民经济行业代码_tfidf_9</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_0</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_1</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_2</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_3</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_4</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_5</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_6</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_7</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_8</th>\n",
       "      <th>所在省份_国民经济行业代码_countvec_9</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_0</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_1</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_2</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_3</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_4</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_5</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_6</th>\n",
       "      <th>客户编号_所在省份_国民经济行业代码_w2v_7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "      <td>3.112701e-32</td>\n",
       "      <td>-3.182684e-31</td>\n",
       "      <td>1.459411e-30</td>\n",
       "      <td>-3.919896e-30</td>\n",
       "      <td>-1.098859e-30</td>\n",
       "      <td>-2.575129e-30</td>\n",
       "      <td>1.086710e-29</td>\n",
       "      <td>-1.080662e-29</td>\n",
       "      <td>-1.030773e-29</td>\n",
       "      <td>-6.907220e-30</td>\n",
       "      <td>3.112701e-32</td>\n",
       "      <td>-3.182684e-31</td>\n",
       "      <td>1.459411e-30</td>\n",
       "      <td>-3.919896e-30</td>\n",
       "      <td>-1.098859e-30</td>\n",
       "      <td>-2.575129e-30</td>\n",
       "      <td>1.086710e-29</td>\n",
       "      <td>-1.080662e-29</td>\n",
       "      <td>-1.030773e-29</td>\n",
       "      <td>-6.907220e-30</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>-2.426068e-17</td>\n",
       "      <td>-1.053226e-21</td>\n",
       "      <td>-1.867483e-28</td>\n",
       "      <td>9.566782e-33</td>\n",
       "      <td>1.061066e-34</td>\n",
       "      <td>6.700930e-34</td>\n",
       "      <td>1.710763e-32</td>\n",
       "      <td>9.444267e-36</td>\n",
       "      <td>-9.671025e-36</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>-2.426068e-17</td>\n",
       "      <td>-1.053226e-21</td>\n",
       "      <td>-1.867483e-28</td>\n",
       "      <td>9.566782e-33</td>\n",
       "      <td>1.061066e-34</td>\n",
       "      <td>6.700930e-34</td>\n",
       "      <td>1.710763e-32</td>\n",
       "      <td>9.444267e-36</td>\n",
       "      <td>-9.671025e-36</td>\n",
       "      <td>8.890848e-20</td>\n",
       "      <td>5.167360e-20</td>\n",
       "      <td>3.042293e-21</td>\n",
       "      <td>1.043209e-19</td>\n",
       "      <td>1.619241e-18</td>\n",
       "      <td>1.076051e-17</td>\n",
       "      <td>5.214132e-17</td>\n",
       "      <td>-3.663833e-16</td>\n",
       "      <td>-2.029394e-15</td>\n",
       "      <td>1.134680e-13</td>\n",
       "      <td>8.890848e-20</td>\n",
       "      <td>5.167360e-20</td>\n",
       "      <td>3.042293e-21</td>\n",
       "      <td>1.043209e-19</td>\n",
       "      <td>1.619241e-18</td>\n",
       "      <td>1.076051e-17</td>\n",
       "      <td>5.214132e-17</td>\n",
       "      <td>-3.663833e-16</td>\n",
       "      <td>-2.029394e-15</td>\n",
       "      <td>1.134680e-13</td>\n",
       "      <td>7.015789e-23</td>\n",
       "      <td>1.241996e-19</td>\n",
       "      <td>2.034428e-19</td>\n",
       "      <td>4.869705e-16</td>\n",
       "      <td>-2.006510e-15</td>\n",
       "      <td>6.435109e-14</td>\n",
       "      <td>-2.805796e-14</td>\n",
       "      <td>1.106833e-13</td>\n",
       "      <td>-1.226809e-13</td>\n",
       "      <td>2.376093e-12</td>\n",
       "      <td>7.015789e-23</td>\n",
       "      <td>1.241996e-19</td>\n",
       "      <td>2.034428e-19</td>\n",
       "      <td>4.869705e-16</td>\n",
       "      <td>-2.006510e-15</td>\n",
       "      <td>6.435109e-14</td>\n",
       "      <td>-2.805796e-14</td>\n",
       "      <td>1.106833e-13</td>\n",
       "      <td>-1.226809e-13</td>\n",
       "      <td>2.376093e-12</td>\n",
       "      <td>0.009137</td>\n",
       "      <td>0.013954</td>\n",
       "      <td>0.106729</td>\n",
       "      <td>0.058506</td>\n",
       "      <td>0.086638</td>\n",
       "      <td>-0.081813</td>\n",
       "      <td>0.032129</td>\n",
       "      <td>-0.050155</td>\n",
       "      <td>-0.102687</td>\n",
       "      <td>0.068489</td>\n",
       "      <td>0.038643</td>\n",
       "      <td>-0.015280</td>\n",
       "      <td>-0.016746</td>\n",
       "      <td>0.089649</td>\n",
       "      <td>-0.103514</td>\n",
       "      <td>0.049342</td>\n",
       "      <td>0.110077</td>\n",
       "      <td>-0.015712</td>\n",
       "      <td>-0.084828</td>\n",
       "      <td>0.08317</td>\n",
       "      <td>0.032441</td>\n",
       "      <td>0.050066</td>\n",
       "      <td>-0.100693</td>\n",
       "      <td>-0.046908</td>\n",
       "      <td>-0.053017</td>\n",
       "      <td>0.082357</td>\n",
       "      <td>-0.062372</td>\n",
       "      <td>0.074204</td>\n",
       "      <td>-0.009248</td>\n",
       "      <td>-0.066840</td>\n",
       "      <td>-0.041439</td>\n",
       "      <td>0.007692</td>\n",
       "      <td>2.365204e-17</td>\n",
       "      <td>-7.321048e-14</td>\n",
       "      <td>9.643517e-13</td>\n",
       "      <td>-2.287955e-10</td>\n",
       "      <td>2.170271e-07</td>\n",
       "      <td>5.775978e-07</td>\n",
       "      <td>-2.031689e-06</td>\n",
       "      <td>-3.200340e-06</td>\n",
       "      <td>0.000001</td>\n",
       "      <td>-3.319528e-06</td>\n",
       "      <td>2.365204e-17</td>\n",
       "      <td>-7.321048e-14</td>\n",
       "      <td>9.643517e-13</td>\n",
       "      <td>-2.287955e-10</td>\n",
       "      <td>2.170271e-07</td>\n",
       "      <td>5.775978e-07</td>\n",
       "      <td>-2.031689e-06</td>\n",
       "      <td>-3.200340e-06</td>\n",
       "      <td>0.000001</td>\n",
       "      <td>-3.319528e-06</td>\n",
       "      <td>0.018650</td>\n",
       "      <td>0.033680</td>\n",
       "      <td>0.016343</td>\n",
       "      <td>0.013395</td>\n",
       "      <td>-0.102358</td>\n",
       "      <td>0.014802</td>\n",
       "      <td>0.073728</td>\n",
       "      <td>-0.049012</td>\n",
       "      <td>-8.281430e-24</td>\n",
       "      <td>7.698298e-22</td>\n",
       "      <td>1.886886e-20</td>\n",
       "      <td>-1.999001e-19</td>\n",
       "      <td>3.297910e-18</td>\n",
       "      <td>4.061767e-18</td>\n",
       "      <td>-8.337480e-17</td>\n",
       "      <td>1.295573e-15</td>\n",
       "      <td>7.191130e-16</td>\n",
       "      <td>3.338126e-15</td>\n",
       "      <td>-8.281430e-24</td>\n",
       "      <td>7.698298e-22</td>\n",
       "      <td>1.886886e-20</td>\n",
       "      <td>-1.999001e-19</td>\n",
       "      <td>3.297910e-18</td>\n",
       "      <td>4.061767e-18</td>\n",
       "      <td>-8.337480e-17</td>\n",
       "      <td>1.295573e-15</td>\n",
       "      <td>7.191130e-16</td>\n",
       "      <td>3.338126e-15</td>\n",
       "      <td>-0.027268</td>\n",
       "      <td>0.124259</td>\n",
       "      <td>-0.113957</td>\n",
       "      <td>0.094800</td>\n",
       "      <td>0.046124</td>\n",
       "      <td>-0.054023</td>\n",
       "      <td>0.069497</td>\n",
       "      <td>0.084224</td>\n",
       "      <td>8.658229e-27</td>\n",
       "      <td>2.917365e-22</td>\n",
       "      <td>8.402205e-22</td>\n",
       "      <td>-2.632837e-19</td>\n",
       "      <td>-3.262675e-18</td>\n",
       "      <td>-5.255084e-18</td>\n",
       "      <td>7.503772e-18</td>\n",
       "      <td>-1.026334e-17</td>\n",
       "      <td>2.586156e-16</td>\n",
       "      <td>3.842865e-17</td>\n",
       "      <td>8.658229e-27</td>\n",
       "      <td>2.917365e-22</td>\n",
       "      <td>8.402205e-22</td>\n",
       "      <td>-2.632837e-19</td>\n",
       "      <td>-3.262675e-18</td>\n",
       "      <td>-5.255084e-18</td>\n",
       "      <td>7.503772e-18</td>\n",
       "      <td>-1.026334e-17</td>\n",
       "      <td>2.586156e-16</td>\n",
       "      <td>3.842865e-17</td>\n",
       "      <td>-0.014348</td>\n",
       "      <td>-0.113956</td>\n",
       "      <td>-0.072508</td>\n",
       "      <td>-0.024684</td>\n",
       "      <td>-0.109418</td>\n",
       "      <td>-0.044016</td>\n",
       "      <td>-0.007899</td>\n",
       "      <td>0.112161</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b1d244a25a82adb7beafe33fe971402c</td>\n",
       "      <td>-1.410392e-32</td>\n",
       "      <td>1.273339e-31</td>\n",
       "      <td>-8.141020e-32</td>\n",
       "      <td>2.163897e-30</td>\n",
       "      <td>1.331409e-31</td>\n",
       "      <td>-6.148483e-31</td>\n",
       "      <td>1.653255e-30</td>\n",
       "      <td>2.443203e-30</td>\n",
       "      <td>7.139733e-31</td>\n",
       "      <td>1.090951e-29</td>\n",
       "      <td>-1.410392e-32</td>\n",
       "      <td>1.273339e-31</td>\n",
       "      <td>-8.141020e-32</td>\n",
       "      <td>2.163897e-30</td>\n",
       "      <td>1.331409e-31</td>\n",
       "      <td>-6.148483e-31</td>\n",
       "      <td>1.653255e-30</td>\n",
       "      <td>2.443203e-30</td>\n",
       "      <td>7.139733e-31</td>\n",
       "      <td>1.090951e-29</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>1.191350e-19</td>\n",
       "      <td>8.202335e-20</td>\n",
       "      <td>-7.986753e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.931267e-13</td>\n",
       "      <td>1.685272e-14</td>\n",
       "      <td>2.963131e-15</td>\n",
       "      <td>9.738470e-20</td>\n",
       "      <td>2.085465e-17</td>\n",
       "      <td>-3.055759e-19</td>\n",
       "      <td>1.191350e-19</td>\n",
       "      <td>8.202335e-20</td>\n",
       "      <td>-7.986753e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.931267e-13</td>\n",
       "      <td>1.685272e-14</td>\n",
       "      <td>2.963131e-15</td>\n",
       "      <td>9.738470e-20</td>\n",
       "      <td>2.085465e-17</td>\n",
       "      <td>-3.055759e-19</td>\n",
       "      <td>-1.184941e-27</td>\n",
       "      <td>5.309419e-26</td>\n",
       "      <td>1.584859e-26</td>\n",
       "      <td>1.650859e-21</td>\n",
       "      <td>3.000077e-21</td>\n",
       "      <td>-2.644135e-19</td>\n",
       "      <td>-7.597950e-19</td>\n",
       "      <td>-3.009897e-19</td>\n",
       "      <td>-2.608923e-21</td>\n",
       "      <td>1.679002e-17</td>\n",
       "      <td>-1.184941e-27</td>\n",
       "      <td>5.309419e-26</td>\n",
       "      <td>1.584859e-26</td>\n",
       "      <td>1.650859e-21</td>\n",
       "      <td>3.000077e-21</td>\n",
       "      <td>-2.644135e-19</td>\n",
       "      <td>-7.597950e-19</td>\n",
       "      <td>-3.009897e-19</td>\n",
       "      <td>-2.608923e-21</td>\n",
       "      <td>1.679002e-17</td>\n",
       "      <td>0.117057</td>\n",
       "      <td>0.019628</td>\n",
       "      <td>0.047099</td>\n",
       "      <td>0.018922</td>\n",
       "      <td>0.110582</td>\n",
       "      <td>-0.106525</td>\n",
       "      <td>-0.062890</td>\n",
       "      <td>-0.053150</td>\n",
       "      <td>-0.074633</td>\n",
       "      <td>-0.101456</td>\n",
       "      <td>0.006620</td>\n",
       "      <td>0.118906</td>\n",
       "      <td>0.058938</td>\n",
       "      <td>0.065285</td>\n",
       "      <td>0.054369</td>\n",
       "      <td>0.071516</td>\n",
       "      <td>0.070392</td>\n",
       "      <td>0.035966</td>\n",
       "      <td>-0.024396</td>\n",
       "      <td>0.08069</td>\n",
       "      <td>0.011357</td>\n",
       "      <td>-0.014146</td>\n",
       "      <td>-0.012385</td>\n",
       "      <td>-0.068190</td>\n",
       "      <td>-0.020408</td>\n",
       "      <td>0.019504</td>\n",
       "      <td>0.009387</td>\n",
       "      <td>0.049944</td>\n",
       "      <td>-0.060627</td>\n",
       "      <td>0.037308</td>\n",
       "      <td>-0.014331</td>\n",
       "      <td>0.110149</td>\n",
       "      <td>-9.652560e-20</td>\n",
       "      <td>1.421795e-16</td>\n",
       "      <td>-1.145915e-15</td>\n",
       "      <td>2.706533e-13</td>\n",
       "      <td>-7.546618e-10</td>\n",
       "      <td>-1.734925e-09</td>\n",
       "      <td>1.169652e-08</td>\n",
       "      <td>3.312179e-08</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-1.532140e-07</td>\n",
       "      <td>-9.652560e-20</td>\n",
       "      <td>1.421795e-16</td>\n",
       "      <td>-1.145915e-15</td>\n",
       "      <td>2.706533e-13</td>\n",
       "      <td>-7.546618e-10</td>\n",
       "      <td>-1.734925e-09</td>\n",
       "      <td>1.169652e-08</td>\n",
       "      <td>3.312179e-08</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-1.532140e-07</td>\n",
       "      <td>-0.022287</td>\n",
       "      <td>-0.043544</td>\n",
       "      <td>0.101384</td>\n",
       "      <td>-0.032385</td>\n",
       "      <td>-0.105914</td>\n",
       "      <td>-0.007611</td>\n",
       "      <td>0.073920</td>\n",
       "      <td>-0.077632</td>\n",
       "      <td>-4.236050e-30</td>\n",
       "      <td>-3.099192e-28</td>\n",
       "      <td>3.908434e-27</td>\n",
       "      <td>2.143316e-27</td>\n",
       "      <td>9.712234e-25</td>\n",
       "      <td>8.688776e-25</td>\n",
       "      <td>4.776915e-23</td>\n",
       "      <td>-1.601149e-23</td>\n",
       "      <td>2.631376e-22</td>\n",
       "      <td>-5.958599e-22</td>\n",
       "      <td>-4.236050e-30</td>\n",
       "      <td>-3.099192e-28</td>\n",
       "      <td>3.908434e-27</td>\n",
       "      <td>2.143316e-27</td>\n",
       "      <td>9.712234e-25</td>\n",
       "      <td>8.688776e-25</td>\n",
       "      <td>4.776915e-23</td>\n",
       "      <td>-1.601149e-23</td>\n",
       "      <td>2.631376e-22</td>\n",
       "      <td>-5.958599e-22</td>\n",
       "      <td>-0.026679</td>\n",
       "      <td>0.009142</td>\n",
       "      <td>0.081365</td>\n",
       "      <td>-0.016359</td>\n",
       "      <td>-0.034656</td>\n",
       "      <td>-0.092061</td>\n",
       "      <td>-0.035071</td>\n",
       "      <td>-0.093584</td>\n",
       "      <td>2.073874e-29</td>\n",
       "      <td>3.166712e-26</td>\n",
       "      <td>2.881067e-25</td>\n",
       "      <td>7.010090e-23</td>\n",
       "      <td>5.482755e-23</td>\n",
       "      <td>-4.520141e-22</td>\n",
       "      <td>5.630544e-22</td>\n",
       "      <td>7.432626e-23</td>\n",
       "      <td>-4.792102e-21</td>\n",
       "      <td>4.181161e-21</td>\n",
       "      <td>2.073874e-29</td>\n",
       "      <td>3.166712e-26</td>\n",
       "      <td>2.881067e-25</td>\n",
       "      <td>7.010090e-23</td>\n",
       "      <td>5.482755e-23</td>\n",
       "      <td>-4.520141e-22</td>\n",
       "      <td>5.630544e-22</td>\n",
       "      <td>7.432626e-23</td>\n",
       "      <td>-4.792102e-21</td>\n",
       "      <td>4.181161e-21</td>\n",
       "      <td>-0.045867</td>\n",
       "      <td>-0.059138</td>\n",
       "      <td>-0.083495</td>\n",
       "      <td>0.107400</td>\n",
       "      <td>0.110016</td>\n",
       "      <td>-0.096203</td>\n",
       "      <td>-0.036687</td>\n",
       "      <td>0.015966</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号  法定代表人_tfidf_0  法定代表人_tfidf_1  \\\n",
       "0  158a8d99bec2a2b652a6de45a2b52ec9   3.112701e-32  -3.182684e-31   \n",
       "1  b1d244a25a82adb7beafe33fe971402c  -1.410392e-32   1.273339e-31   \n",
       "\n",
       "   法定代表人_tfidf_2  法定代表人_tfidf_3  法定代表人_tfidf_4  法定代表人_tfidf_5  法定代表人_tfidf_6  \\\n",
       "0   1.459411e-30  -3.919896e-30  -1.098859e-30  -2.575129e-30   1.086710e-29   \n",
       "1  -8.141020e-32   2.163897e-30   1.331409e-31  -6.148483e-31   1.653255e-30   \n",
       "\n",
       "   法定代表人_tfidf_7  法定代表人_tfidf_8  法定代表人_tfidf_9  法定代表人_countvec_0  \\\n",
       "0  -1.080662e-29  -1.030773e-29  -6.907220e-30      3.112701e-32   \n",
       "1   2.443203e-30   7.139733e-31   1.090951e-29     -1.410392e-32   \n",
       "\n",
       "   法定代表人_countvec_1  法定代表人_countvec_2  法定代表人_countvec_3  法定代表人_countvec_4  \\\n",
       "0     -3.182684e-31      1.459411e-30     -3.919896e-30     -1.098859e-30   \n",
       "1      1.273339e-31     -8.141020e-32      2.163897e-30      1.331409e-31   \n",
       "\n",
       "   法定代表人_countvec_5  法定代表人_countvec_6  法定代表人_countvec_7  法定代表人_countvec_8  \\\n",
       "0     -2.575129e-30      1.086710e-29     -1.080662e-29     -1.030773e-29   \n",
       "1     -6.148483e-31      1.653255e-30      2.443203e-30      7.139733e-31   \n",
       "\n",
       "   法定代表人_countvec_9  企业（机构）类型编码_tfidf_0  企业（机构）类型编码_tfidf_1  \\\n",
       "0     -6.907220e-30        1.000000e+00       -2.426068e-17   \n",
       "1      1.090951e-29        2.426068e-17        1.000000e+00   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_2  企业（机构）类型编码_tfidf_3  企业（机构）类型编码_tfidf_4  \\\n",
       "0       -1.053226e-21       -1.867483e-28        9.566782e-33   \n",
       "1        1.346388e-16        2.266380e-23       -1.150903e-27   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_5  企业（机构）类型编码_tfidf_6  企业（机构）类型编码_tfidf_7  \\\n",
       "0        1.061066e-34        6.700930e-34        1.710763e-32   \n",
       "1       -1.622419e-29       -4.599464e-29       -1.176252e-27   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_8  企业（机构）类型编码_tfidf_9  企业（机构）类型编码_countvec_0  \\\n",
       "0        9.444267e-36       -9.671025e-36           1.000000e+00   \n",
       "1       -6.493528e-31        6.649440e-31           2.426068e-17   \n",
       "\n",
       "   企业（机构）类型编码_countvec_1  企业（机构）类型编码_countvec_2  企业（机构）类型编码_countvec_3  \\\n",
       "0          -2.426068e-17          -1.053226e-21          -1.867483e-28   \n",
       "1           1.000000e+00           1.346388e-16           2.266380e-23   \n",
       "\n",
       "   企业（机构）类型编码_countvec_4  企业（机构）类型编码_countvec_5  企业（机构）类型编码_countvec_6  \\\n",
       "0           9.566782e-33           1.061066e-34           6.700930e-34   \n",
       "1          -1.150903e-27          -1.622419e-29          -4.599464e-29   \n",
       "\n",
       "   企业（机构）类型编码_countvec_7  企业（机构）类型编码_countvec_8  企业（机构）类型编码_countvec_9  \\\n",
       "0           1.710763e-32           9.444267e-36          -9.671025e-36   \n",
       "1          -1.176252e-27          -6.493528e-31           6.649440e-31   \n",
       "\n",
       "   所在省份编码_tfidf_0  所在省份编码_tfidf_1  所在省份编码_tfidf_2  所在省份编码_tfidf_3  \\\n",
       "0    8.890848e-20    5.167360e-20    3.042293e-21    1.043209e-19   \n",
       "1    1.191350e-19    8.202335e-20   -7.986753e-17    1.000000e+00   \n",
       "\n",
       "   所在省份编码_tfidf_4  所在省份编码_tfidf_5  所在省份编码_tfidf_6  所在省份编码_tfidf_7  \\\n",
       "0    1.619241e-18    1.076051e-17    5.214132e-17   -3.663833e-16   \n",
       "1    1.931267e-13    1.685272e-14    2.963131e-15    9.738470e-20   \n",
       "\n",
       "   所在省份编码_tfidf_8  所在省份编码_tfidf_9  所在省份编码_countvec_0  所在省份编码_countvec_1  \\\n",
       "0   -2.029394e-15    1.134680e-13       8.890848e-20       5.167360e-20   \n",
       "1    2.085465e-17   -3.055759e-19       1.191350e-19       8.202335e-20   \n",
       "\n",
       "   所在省份编码_countvec_2  所在省份编码_countvec_3  所在省份编码_countvec_4  所在省份编码_countvec_5  \\\n",
       "0       3.042293e-21       1.043209e-19       1.619241e-18       1.076051e-17   \n",
       "1      -7.986753e-17       1.000000e+00       1.931267e-13       1.685272e-14   \n",
       "\n",
       "   所在省份编码_countvec_6  所在省份编码_countvec_7  所在省份编码_countvec_8  所在省份编码_countvec_9  \\\n",
       "0       5.214132e-17      -3.663833e-16      -2.029394e-15       1.134680e-13   \n",
       "1       2.963131e-15       9.738470e-20       2.085465e-17      -3.055759e-19   \n",
       "\n",
       "   国民经济行业代码_tfidf_0  国民经济行业代码_tfidf_1  国民经济行业代码_tfidf_2  国民经济行业代码_tfidf_3  \\\n",
       "0      7.015789e-23      1.241996e-19      2.034428e-19      4.869705e-16   \n",
       "1     -1.184941e-27      5.309419e-26      1.584859e-26      1.650859e-21   \n",
       "\n",
       "   国民经济行业代码_tfidf_4  国民经济行业代码_tfidf_5  国民经济行业代码_tfidf_6  国民经济行业代码_tfidf_7  \\\n",
       "0     -2.006510e-15      6.435109e-14     -2.805796e-14      1.106833e-13   \n",
       "1      3.000077e-21     -2.644135e-19     -7.597950e-19     -3.009897e-19   \n",
       "\n",
       "   国民经济行业代码_tfidf_8  国民经济行业代码_tfidf_9  国民经济行业代码_countvec_0  \\\n",
       "0     -1.226809e-13      2.376093e-12         7.015789e-23   \n",
       "1     -2.608923e-21      1.679002e-17        -1.184941e-27   \n",
       "\n",
       "   国民经济行业代码_countvec_1  国民经济行业代码_countvec_2  国民经济行业代码_countvec_3  \\\n",
       "0         1.241996e-19         2.034428e-19         4.869705e-16   \n",
       "1         5.309419e-26         1.584859e-26         1.650859e-21   \n",
       "\n",
       "   国民经济行业代码_countvec_4  国民经济行业代码_countvec_5  国民经济行业代码_countvec_6  \\\n",
       "0        -2.006510e-15         6.435109e-14        -2.805796e-14   \n",
       "1         3.000077e-21        -2.644135e-19        -7.597950e-19   \n",
       "\n",
       "   国民经济行业代码_countvec_7  国民经济行业代码_countvec_8  国民经济行业代码_countvec_9  \\\n",
       "0         1.106833e-13        -1.226809e-13         2.376093e-12   \n",
       "1        -3.009897e-19        -2.608923e-21         1.679002e-17   \n",
       "\n",
       "   客户编号_法定代表人_w2v_0  客户编号_法定代表人_w2v_1  客户编号_法定代表人_w2v_2  客户编号_法定代表人_w2v_3  \\\n",
       "0          0.009137          0.013954          0.106729          0.058506   \n",
       "1          0.117057          0.019628          0.047099          0.018922   \n",
       "\n",
       "   客户编号_法定代表人_w2v_4  客户编号_法定代表人_w2v_5  客户编号_法定代表人_w2v_6  客户编号_法定代表人_w2v_7  \\\n",
       "0          0.086638         -0.081813          0.032129         -0.050155   \n",
       "1          0.110582         -0.106525         -0.062890         -0.053150   \n",
       "\n",
       "   客户编号_企业（机构）类型编码_w2v_0  客户编号_企业（机构）类型编码_w2v_1  客户编号_企业（机构）类型编码_w2v_2  \\\n",
       "0              -0.102687               0.068489               0.038643   \n",
       "1              -0.074633              -0.101456               0.006620   \n",
       "\n",
       "   客户编号_企业（机构）类型编码_w2v_3  客户编号_企业（机构）类型编码_w2v_4  客户编号_企业（机构）类型编码_w2v_5  \\\n",
       "0              -0.015280              -0.016746               0.089649   \n",
       "1               0.118906               0.058938               0.065285   \n",
       "\n",
       "   客户编号_企业（机构）类型编码_w2v_6  客户编号_企业（机构）类型编码_w2v_7  客户编号_所在省份编码_w2v_0  \\\n",
       "0              -0.103514               0.049342           0.110077   \n",
       "1               0.054369               0.071516           0.070392   \n",
       "\n",
       "   客户编号_所在省份编码_w2v_1  客户编号_所在省份编码_w2v_2  客户编号_所在省份编码_w2v_3  客户编号_所在省份编码_w2v_4  \\\n",
       "0          -0.015712          -0.084828            0.08317           0.032441   \n",
       "1           0.035966          -0.024396            0.08069           0.011357   \n",
       "\n",
       "   客户编号_所在省份编码_w2v_5  客户编号_所在省份编码_w2v_6  客户编号_所在省份编码_w2v_7  \\\n",
       "0           0.050066          -0.100693          -0.046908   \n",
       "1          -0.014146          -0.012385          -0.068190   \n",
       "\n",
       "   客户编号_国民经济行业代码_w2v_0  客户编号_国民经济行业代码_w2v_1  客户编号_国民经济行业代码_w2v_2  \\\n",
       "0            -0.053017             0.082357            -0.062372   \n",
       "1            -0.020408             0.019504             0.009387   \n",
       "\n",
       "   客户编号_国民经济行业代码_w2v_3  客户编号_国民经济行业代码_w2v_4  客户编号_国民经济行业代码_w2v_5  \\\n",
       "0             0.074204            -0.009248            -0.066840   \n",
       "1             0.049944            -0.060627             0.037308   \n",
       "\n",
       "   客户编号_国民经济行业代码_w2v_6  客户编号_国民经济行业代码_w2v_7  企业机构类型_所在省份_tfidf_0  \\\n",
       "0            -0.041439             0.007692         2.365204e-17   \n",
       "1            -0.014331             0.110149        -9.652560e-20   \n",
       "\n",
       "   企业机构类型_所在省份_tfidf_1  企业机构类型_所在省份_tfidf_2  企业机构类型_所在省份_tfidf_3  \\\n",
       "0        -7.321048e-14         9.643517e-13        -2.287955e-10   \n",
       "1         1.421795e-16        -1.145915e-15         2.706533e-13   \n",
       "\n",
       "   企业机构类型_所在省份_tfidf_4  企业机构类型_所在省份_tfidf_5  企业机构类型_所在省份_tfidf_6  \\\n",
       "0         2.170271e-07         5.775978e-07        -2.031689e-06   \n",
       "1        -7.546618e-10        -1.734925e-09         1.169652e-08   \n",
       "\n",
       "   企业机构类型_所在省份_tfidf_7  企业机构类型_所在省份_tfidf_8  企业机构类型_所在省份_tfidf_9  \\\n",
       "0        -3.200340e-06             0.000001        -3.319528e-06   \n",
       "1         3.312179e-08             1.000000        -1.532140e-07   \n",
       "\n",
       "   企业机构类型_所在省份_countvec_0  企业机构类型_所在省份_countvec_1  企业机构类型_所在省份_countvec_2  \\\n",
       "0            2.365204e-17           -7.321048e-14            9.643517e-13   \n",
       "1           -9.652560e-20            1.421795e-16           -1.145915e-15   \n",
       "\n",
       "   企业机构类型_所在省份_countvec_3  企业机构类型_所在省份_countvec_4  企业机构类型_所在省份_countvec_5  \\\n",
       "0           -2.287955e-10            2.170271e-07            5.775978e-07   \n",
       "1            2.706533e-13           -7.546618e-10           -1.734925e-09   \n",
       "\n",
       "   企业机构类型_所在省份_countvec_6  企业机构类型_所在省份_countvec_7  企业机构类型_所在省份_countvec_8  \\\n",
       "0           -2.031689e-06           -3.200340e-06                0.000001   \n",
       "1            1.169652e-08            3.312179e-08                1.000000   \n",
       "\n",
       "   企业机构类型_所在省份_countvec_9  客户编号_企业机构类型_所在省份_w2v_0  客户编号_企业机构类型_所在省份_w2v_1  \\\n",
       "0           -3.319528e-06                0.018650                0.033680   \n",
       "1           -1.532140e-07               -0.022287               -0.043544   \n",
       "\n",
       "   客户编号_企业机构类型_所在省份_w2v_2  客户编号_企业机构类型_所在省份_w2v_3  客户编号_企业机构类型_所在省份_w2v_4  \\\n",
       "0                0.016343                0.013395               -0.102358   \n",
       "1                0.101384               -0.032385               -0.105914   \n",
       "\n",
       "   客户编号_企业机构类型_所在省份_w2v_5  客户编号_企业机构类型_所在省份_w2v_6  客户编号_企业机构类型_所在省份_w2v_7  \\\n",
       "0                0.014802                0.073728               -0.049012   \n",
       "1               -0.007611                0.073920               -0.077632   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_tfidf_0  企业机构类型_国民经济行业代码_tfidf_1  企业机构类型_国民经济行业代码_tfidf_2  \\\n",
       "0            -8.281430e-24             7.698298e-22             1.886886e-20   \n",
       "1            -4.236050e-30            -3.099192e-28             3.908434e-27   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_tfidf_3  企业机构类型_国民经济行业代码_tfidf_4  企业机构类型_国民经济行业代码_tfidf_5  \\\n",
       "0            -1.999001e-19             3.297910e-18             4.061767e-18   \n",
       "1             2.143316e-27             9.712234e-25             8.688776e-25   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_tfidf_6  企业机构类型_国民经济行业代码_tfidf_7  企业机构类型_国民经济行业代码_tfidf_8  \\\n",
       "0            -8.337480e-17             1.295573e-15             7.191130e-16   \n",
       "1             4.776915e-23            -1.601149e-23             2.631376e-22   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_tfidf_9  企业机构类型_国民经济行业代码_countvec_0  \\\n",
       "0             3.338126e-15               -8.281430e-24   \n",
       "1            -5.958599e-22               -4.236050e-30   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_countvec_1  企业机构类型_国民经济行业代码_countvec_2  \\\n",
       "0                7.698298e-22                1.886886e-20   \n",
       "1               -3.099192e-28                3.908434e-27   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_countvec_3  企业机构类型_国民经济行业代码_countvec_4  \\\n",
       "0               -1.999001e-19                3.297910e-18   \n",
       "1                2.143316e-27                9.712234e-25   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_countvec_5  企业机构类型_国民经济行业代码_countvec_6  \\\n",
       "0                4.061767e-18               -8.337480e-17   \n",
       "1                8.688776e-25                4.776915e-23   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_countvec_7  企业机构类型_国民经济行业代码_countvec_8  \\\n",
       "0                1.295573e-15                7.191130e-16   \n",
       "1               -1.601149e-23                2.631376e-22   \n",
       "\n",
       "   企业机构类型_国民经济行业代码_countvec_9  客户编号_企业机构类型_国民经济行业代码_w2v_0  \\\n",
       "0                3.338126e-15                   -0.027268   \n",
       "1               -5.958599e-22                   -0.026679   \n",
       "\n",
       "   客户编号_企业机构类型_国民经济行业代码_w2v_1  客户编号_企业机构类型_国民经济行业代码_w2v_2  \\\n",
       "0                    0.124259                   -0.113957   \n",
       "1                    0.009142                    0.081365   \n",
       "\n",
       "   客户编号_企业机构类型_国民经济行业代码_w2v_3  客户编号_企业机构类型_国民经济行业代码_w2v_4  \\\n",
       "0                    0.094800                    0.046124   \n",
       "1                   -0.016359                   -0.034656   \n",
       "\n",
       "   客户编号_企业机构类型_国民经济行业代码_w2v_5  客户编号_企业机构类型_国民经济行业代码_w2v_6  \\\n",
       "0                   -0.054023                    0.069497   \n",
       "1                   -0.092061                   -0.035071   \n",
       "\n",
       "   客户编号_企业机构类型_国民经济行业代码_w2v_7  所在省份_国民经济行业代码_tfidf_0  所在省份_国民经济行业代码_tfidf_1  \\\n",
       "0                    0.084224           8.658229e-27           2.917365e-22   \n",
       "1                   -0.093584           2.073874e-29           3.166712e-26   \n",
       "\n",
       "   所在省份_国民经济行业代码_tfidf_2  所在省份_国民经济行业代码_tfidf_3  所在省份_国民经济行业代码_tfidf_4  \\\n",
       "0           8.402205e-22          -2.632837e-19          -3.262675e-18   \n",
       "1           2.881067e-25           7.010090e-23           5.482755e-23   \n",
       "\n",
       "   所在省份_国民经济行业代码_tfidf_5  所在省份_国民经济行业代码_tfidf_6  所在省份_国民经济行业代码_tfidf_7  \\\n",
       "0          -5.255084e-18           7.503772e-18          -1.026334e-17   \n",
       "1          -4.520141e-22           5.630544e-22           7.432626e-23   \n",
       "\n",
       "   所在省份_国民经济行业代码_tfidf_8  所在省份_国民经济行业代码_tfidf_9  所在省份_国民经济行业代码_countvec_0  \\\n",
       "0           2.586156e-16           3.842865e-17              8.658229e-27   \n",
       "1          -4.792102e-21           4.181161e-21              2.073874e-29   \n",
       "\n",
       "   所在省份_国民经济行业代码_countvec_1  所在省份_国民经济行业代码_countvec_2  \\\n",
       "0              2.917365e-22              8.402205e-22   \n",
       "1              3.166712e-26              2.881067e-25   \n",
       "\n",
       "   所在省份_国民经济行业代码_countvec_3  所在省份_国民经济行业代码_countvec_4  \\\n",
       "0             -2.632837e-19             -3.262675e-18   \n",
       "1              7.010090e-23              5.482755e-23   \n",
       "\n",
       "   所在省份_国民经济行业代码_countvec_5  所在省份_国民经济行业代码_countvec_6  \\\n",
       "0             -5.255084e-18              7.503772e-18   \n",
       "1             -4.520141e-22              5.630544e-22   \n",
       "\n",
       "   所在省份_国民经济行业代码_countvec_7  所在省份_国民经济行业代码_countvec_8  \\\n",
       "0             -1.026334e-17              2.586156e-16   \n",
       "1              7.432626e-23             -4.792102e-21   \n",
       "\n",
       "   所在省份_国民经济行业代码_countvec_9  客户编号_所在省份_国民经济行业代码_w2v_0  \\\n",
       "0              3.842865e-17                 -0.014348   \n",
       "1              4.181161e-21                 -0.045867   \n",
       "\n",
       "   客户编号_所在省份_国民经济行业代码_w2v_1  客户编号_所在省份_国民经济行业代码_w2v_2  \\\n",
       "0                 -0.113956                 -0.072508   \n",
       "1                 -0.059138                 -0.083495   \n",
       "\n",
       "   客户编号_所在省份_国民经济行业代码_w2v_3  客户编号_所在省份_国民经济行业代码_w2v_4  \\\n",
       "0                 -0.024684                 -0.109418   \n",
       "1                  0.107400                  0.110016   \n",
       "\n",
       "   客户编号_所在省份_国民经济行业代码_w2v_5  客户编号_所在省份_国民经济行业代码_w2v_6  \\\n",
       "0                 -0.044016                 -0.007899   \n",
       "1                 -0.096203                 -0.036687   \n",
       "\n",
       "   客户编号_所在省份_国民经济行业代码_w2v_7  \n",
       "0                  0.112161  \n",
       "1                  0.015966  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "basic_text = BASIC_text()\n",
    "print(basic_text.shape)\n",
    "basic_text.head(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a1f930b4-2a3e-41a4-99df-1c71c14003be",
   "metadata": {},
   "source": [
    "# 交易明细业务特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "35655e81-bd28-489c-acd1-c0bf5f94edef",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:11:20.822662Z",
     "iopub.status.busy": "2024-11-11T03:11:20.821992Z",
     "iopub.status.idle": "2024-11-11T03:11:20.872092Z",
     "msg_id": "b0e1eaf8-520a-420f-bd1e-83c4ed6491ae",
     "shell.execute_reply": "2024-11-11T03:11:20.871296Z",
     "shell.execute_reply.started": "2024-11-11T03:11:20.822630Z"
    }
   },
   "outputs": [],
   "source": [
    "def FNCL_TR_DTAL_info():\n",
    "    data = get_data(file_name=\"XW_ENTINFO_FNCL_TR_DTAL\")\n",
    "    data_1 = get_data(file_name=\"XW_ENTINFO_BASIC\")\n",
    "    data = data.merge(data_1[[\"数据日期\",\"客户编号\"]], how='left', on=[\"客户编号\"])\n",
    "    low_day_train = [20020428, 20020429, 20020502, 20020505, 20020506, 20020512, 20020513, 20020519, 20020526, 20020527, 20020528, 20020529, 20020530, 20020603, 20020609, 20020610, 20020616, 20020617, 20020623, 20020624, 20020630, 20020701, 20020707, 20020708, 20020714, 20020715, 20020719, 20020720, 20020721]\n",
    "    low_day_B = [20020921, 20020920, 20020919, 20020915, 20020914, 20020908, 20020907, 20020831, 20020901, 20020825, 20020824, 20020817, 20020818, 20020811, 20020810, 20020804, 20020731, 20020730, 20020729, 20020728, 20020727, 20020720, 20020714, 20020713, 20020707, 20020706, 20020703,20020629, 20020630]\n",
    "    xx = data[data[\"is_train\"] == 1]\n",
    "    yy = data[data[\"is_train\"] == 0]\n",
    "    xx[\"是否节假日或周末\"] = xx[\"交易日期\"].apply(lambda x:1 if x in low_day_train else 0)\n",
    "    yy[\"是否节假日或周末\"] = yy[\"交易日期\"].apply(lambda x:1 if x in low_day_B else 0)\n",
    "    data = pd.concat([xx, yy])\n",
    "    data['交易日期'] = data['交易日期'].astype('str')\n",
    "    data['交易日期'] = data['交易日期'].astype('datetime64[ns]')\n",
    "    data['数据日期'] = data['数据日期'].astype('str')\n",
    "    data['数据日期'] = data['数据日期'].astype('datetime64[ns]')\n",
    "    data['数据日期_距离_交易_天数']= data.apply(lambda x:(x['数据日期']-x['交易日期']).days, axis=1)\n",
    "    \n",
    "    ######折人民币交易金额    \n",
    "    #交易金额占比\n",
    "    data_agg = pd.DataFrame(TARGET['客户编号'])\n",
    "    \n",
    "    data['交易金额占比'] = data['折人民币交易金额']/(data['合约账户余额'] + 0.00001)\n",
    "    \n",
    "    temp1 = data.groupby('客户编号').agg({'交易金额占比': ['mean', 'max', 'min']})\n",
    "    if data_agg.empty :\n",
    "        data_agg = copy.deepcopy(temp1)\n",
    "    else:\n",
    "        data_agg = data_agg.merge(temp1, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    temp2 = data[data['记账方向代码'] == '16459755d990723240edb88e34a13fab'].groupby(['客户编号']).agg({'交易金额占比': ['mean', 'max', 'min']})\n",
    "    data_agg = data_agg.merge(temp2, how=\"left\", on=\"客户编号\")\n",
    "    temp3 = data[data['记账方向代码'] == '1250d7cb654a81c7b9366dabf57fe62b'].groupby(['客户编号']).agg({'交易金额占比': ['mean', 'max', 'min']})\n",
    "    data_agg = data_agg.merge(temp3, how=\"left\", on=\"客户编号\")\n",
    "    temp4 = data[data['同名账户标识'] == 1].groupby(['客户编号']).agg({'交易金额占比': ['mean', 'max', 'min']})\n",
    "    data_agg = data_agg.merge(temp4, how=\"left\", on=\"客户编号\")\n",
    "    temp5 = data[data['同名账户标识'] == 0].groupby(['客户编号']).agg({'交易金额占比': ['mean', 'max', 'min']})\n",
    "    data_agg = data_agg.merge(temp5, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    temp6 = data.groupby('客户编号').agg({'折人民币交易金额': 'count'})\n",
    "    data_agg = data_agg.merge(temp6, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    data_agg.columns = ['客户编号','总交易金额占比_mean', '总交易金额占比_max', '总交易金额占比_min', '转入金额占比_mean', '转入金额占比_max', '转入金额占比_min',\n",
    "                   '转出金额占比_mean', '转出金额占比_max', '转出金额占比_min', '本人金额占比_mean', '本人金额占比_max', '本人金额占比_min',\n",
    "                   '非本人金额占比_mean', '非本人金额占比_max', '非本人金额占比_min', '总交易次数']\n",
    "    #交易次数异常\n",
    "    data_agg['交易次数小于等于5'] =  np.where((data_agg['总交易次数'] <= 5), 1, 0)\n",
    "    data_agg = data_agg.drop(['总交易次数'], axis = 1)\n",
    "    \n",
    "    #总流入流出\n",
    "    data_temp1 = data[data['记账方向代码'] == '16459755d990723240edb88e34a13fab']\n",
    "    temp1 = data_temp1.groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count', 'mean', 'std', 'max']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','总流出金额','总流出笔数', '流出平均金额', '流出金额方差', '流出金额最大值']\n",
    "    data_temp2 = data[data['记账方向代码'] == '1250d7cb654a81c7b9366dabf57fe62b']\n",
    "    temp2 = data_temp2.groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count', 'mean', 'std', 'max']})\n",
    "    temp2.reset_index(inplace=True)\n",
    "    temp2.columns = ['客户编号','总流入金额','总流入笔数', '流入平均金额', '流入金额方差', '流入金额最大值']\n",
    "    data_agg = data_agg.merge(temp1, how = 'left', on = '客户编号')\n",
    "    data_agg = data_agg.merge(temp2, how = 'left', on = '客户编号')\n",
    "\n",
    "    data_agg['总净流'] = data_agg['总流入金额'] - data_agg['总流出金额']\n",
    "    data_agg['总金额'] = data_agg['总流入金额'] + data_agg['总流出金额']\n",
    "    data_agg['总笔数'] = data_agg['总流入笔数'] + data_agg['总流出笔数']\n",
    "    \n",
    "    #最后一个月\n",
    "    data_temp1 = data[(data['数据日期_距离_交易_天数'] <= 30) & (data['记账方向代码'] == '16459755d990723240edb88e34a13fab')]\n",
    "    temp1 = data_temp1.groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','近一月流出金额','近一月流出笔数']\n",
    "    data_temp2 = data[(data['数据日期_距离_交易_天数'] <= 30) & (data['记账方向代码'] == '1250d7cb654a81c7b9366dabf57fe62b')]\n",
    "    temp2 = data_temp2.groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count']})\n",
    "    temp2.reset_index(inplace=True)\n",
    "    temp2.columns = ['客户编号','近一月流入金额','近一月流入笔数']\n",
    "    data_agg = data_agg.merge(temp1, how = 'left', on = '客户编号')\n",
    "    data_agg = data_agg.merge(temp2, how = 'left', on = '客户编号')\n",
    "    data_agg['近一月总净流'] = data_agg['近一月流入金额'] - data_agg['近一月流出金额']\n",
    "    data_agg['近一月总金额'] = data_agg['近一月流入金额'] + data_agg['近一月流出金额']\n",
    "    data_agg['近一月总笔数'] = data_agg['近一月流入笔数'] + data_agg['近一月流出笔数']\n",
    "\n",
    "    #倒数第三月\n",
    "    data_temp1 = data[(data['数据日期_距离_交易_天数'] >= 60) & (data['记账方向代码'] == '16459755d990723240edb88e34a13fab')]\n",
    "    temp1 = data_temp1.groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','倒数第三月流出金额','倒数第三月流出笔数']\n",
    "    data_temp2 = data[(data['数据日期_距离_交易_天数'] >= 60) & (data['记账方向代码'] == '1250d7cb654a81c7b9366dabf57fe62b')]\n",
    "    temp2 = data_temp2.groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count']})\n",
    "    temp2.reset_index(inplace=True)\n",
    "    temp2.columns = ['客户编号','倒数第三月流入金额','倒数第三月流入笔数']\n",
    "    data_agg = data_agg.merge(temp1, how = 'left', on = '客户编号')\n",
    "    data_agg = data_agg.merge(temp2, how = 'left', on = '客户编号')\n",
    "    data_agg['倒数第三月总净流'] = data_agg['倒数第三月流入金额'] - data_agg['倒数第三月流出金额']\n",
    "    data_agg['倒数第三月总金额'] = data_agg['倒数第三月流入金额'] + data_agg['倒数第三月流出金额']\n",
    "    data_agg['倒数第三月总笔数'] = data_agg['倒数第三月流入笔数'] + data_agg['倒数第三月流出笔数']\n",
    "\n",
    "    #第一个月和第三月的趋势\n",
    "    data_agg['第三个月与第一个月流入金额差'] = data_agg['近一月流入金额'] - data_agg['倒数第三月流入金额']\n",
    "    data_agg['第三个月与第一个月流出金额差'] = data_agg['近一月流出金额'] - data_agg['倒数第三月流出金额']\n",
    "    data_agg['第三个月与第一个月总金额差'] = data_agg['近一月总金额'] - data_agg['倒数第三月总金额']\n",
    "    data_agg['第三个月与第一个月流出笔数差'] = data_agg['近一月流出笔数'] - data_agg['倒数第三月流出笔数']\n",
    "    data_agg['第三个月与第一个月流入笔数差'] = data_agg['近一月流入笔数'] - data_agg['倒数第三月流入笔数']\n",
    "    data_agg['第三个月与第一个月总笔数差'] = data_agg['近一月总笔数'] - data_agg['倒数第三月总笔数']\n",
    "\n",
    "    \n",
    "    #对手账号：相关客户数\n",
    "    temp = data.groupby(['客户编号']).agg({'交易对手客户编号':['nunique']})\n",
    "    temp.reset_index(inplace=True)\n",
    "    temp.columns = ['客户编号','相关客户数']\n",
    "    data_agg = data_agg.merge(temp, how = 'left', on = '客户编号')\n",
    "    \n",
    "    #最后交易日\n",
    "    t = data[[\"客户编号\", \"交易日期\"]].groupby(by=\"客户编号\").agg({\"交易日期\":[\"max\"]})\n",
    "    t.columns = [c[0] + c[1] for c in t.columns.tolist()]\n",
    "    t.reset_index(drop=False,inplace=True)\n",
    "    t.columns = ['客户编号','最后交易日期']\n",
    "    data1 = data.merge(t, how=\"inner\", on=\"客户编号\")\n",
    "    \n",
    "    data_temp = data1[data1['最后交易日期'] == data1['交易日期']]\n",
    "    temp1 = data_temp[data_temp['记账方向代码'] == '16459755d990723240edb88e34a13fab'].groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','最后交易日流出金额','最后交易日流出笔数']\n",
    "    data_agg = data_agg.merge(temp1, how=\"left\", on=\"客户编号\")\n",
    "    temp2 = data_temp[data_temp['记账方向代码'] == '1250d7cb654a81c7b9366dabf57fe62b'].groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count']})\n",
    "    temp2.reset_index(inplace=True)\n",
    "    temp2.columns = ['客户编号','最后交易日流入金额','最后交易日流入笔数']\n",
    "    data_agg = data_agg.merge(temp2, how=\"left\", on=\"客户编号\")\n",
    "    data_agg['最后交易日总净流'] = data_agg['最后交易日流入金额'] - data_agg['最后交易日流出金额']\n",
    "    data_agg['最后交易日总金额'] = data_agg['最后交易日流入金额'] + data_agg['最后交易日流出金额']\n",
    "    data_agg['最后交易日总笔数'] = data_agg['最后交易日流入笔数'] + data_agg['最后交易日流出笔数']\n",
    "\n",
    "\n",
    "    #非工作日交易\n",
    "    temp1 = data[data['是否节假日或周末'] == 1].groupby(['客户编号']).agg({'折人民币交易金额':['sum', 'count']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','非工作日交易金额','非工作日交易笔数']\n",
    "    data_agg = data_agg.merge(temp1, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    #近三月汇总,总金额和总笔数上面已经做了\n",
    "    temp = data.groupby(['客户编号']).agg({'折人民币交易金额':['max','min','mean','std']})\n",
    "    temp.reset_index(inplace=True)\n",
    "    temp.columns = ['客户编号','企业交易绝对值最高金额','企业交易绝对值最低金额', '企业交易绝对值_mean', '企业交易绝对值_std']\n",
    "    data_agg = data_agg.merge(temp, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    \n",
    "    ###合约账户余额\n",
    "    #最后一月合约账户余额\n",
    "    data_temp1 = data[data['数据日期_距离_交易_天数'] <= 30]\n",
    "    temp1 = data_temp1.groupby(['客户编号']).agg({'合约账户余额':['mean', 'max', 'std']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','近一月平均账户余额','近一月最大账户余额', '近一月账户余额方差']\n",
    "    data_agg = data_agg.merge(temp1, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    #倒数第三个月合约账户余额\n",
    "    data_temp1 = data[data['数据日期_距离_交易_天数'] >= 60]\n",
    "    temp1 = data_temp1.groupby(['客户编号']).agg({'合约账户余额':['mean', 'max', 'std']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','倒数第三个月平均账户余额','倒数第三个月最大账户余额', '倒数第三个月账户余额方差']\n",
    "    data_agg = data_agg.merge(temp1, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    #第一月和第三月的账户余额差值\n",
    "    data_agg['第三个月与第一个月余额均值差'] = data_agg['近一月平均账户余额'] - data_agg['倒数第三个月平均账户余额']\n",
    "    data_agg['第三个月与第一个月余额最大差'] = data_agg['近一月最大账户余额'] - data_agg['倒数第三个月最大账户余额']\n",
    "\n",
    "    \n",
    "    #第一月和第三月交易代码、渠道代码差值\n",
    "    data_temp1 = data[data['数据日期_距离_交易_天数'] <= 30]\n",
    "    temp1 = data_temp1.groupby(['客户编号']).agg({'交易代码':['nunique'], '渠道代码':['nunique']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','近一月交易代码个数','近一月渠道代码个数']\n",
    "    data_agg = data_agg.merge(temp1, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    data_temp1 = data[data['数据日期_距离_交易_天数'] >= 60]\n",
    "    temp1 = data_temp1.groupby(['客户编号']).agg({'交易代码':['nunique'], '渠道代码':['nunique']})\n",
    "    temp1.reset_index(inplace=True)\n",
    "    temp1.columns = ['客户编号','倒数第三月交易代码个数','倒数第三月渠道代码个数']\n",
    "    data_agg = data_agg.merge(temp1, how=\"left\", on=\"客户编号\")\n",
    "\n",
    "    data_agg['第三个月与第一个月渠道数差'] = data_agg['近一月渠道代码个数'] - data_agg['倒数第三月渠道代码个数']\n",
    "    data_agg['第三个月与第一个月交易代码数差'] = data_agg['近一月交易代码个数'] - data_agg['倒数第三月交易代码个数']\n",
    "\n",
    "    return data_agg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "f1bbd00d-86e5-453b-b85a-3531a61ccbc4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:11:23.147322Z",
     "iopub.status.busy": "2024-11-11T03:11:23.146868Z",
     "iopub.status.idle": "2024-11-11T03:17:05.238297Z",
     "msg_id": "65658956-7258-43d6-a163-8bda35189a2f",
     "shell.execute_reply": "2024-11-11T03:17:05.237581Z",
     "shell.execute_reply.started": "2024-11-11T03:11:23.147291Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 78)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>总交易金额占比_mean</th>\n",
       "      <th>总交易金额占比_max</th>\n",
       "      <th>总交易金额占比_min</th>\n",
       "      <th>转入金额占比_mean</th>\n",
       "      <th>转入金额占比_max</th>\n",
       "      <th>转入金额占比_min</th>\n",
       "      <th>转出金额占比_mean</th>\n",
       "      <th>转出金额占比_max</th>\n",
       "      <th>转出金额占比_min</th>\n",
       "      <th>本人金额占比_mean</th>\n",
       "      <th>本人金额占比_max</th>\n",
       "      <th>本人金额占比_min</th>\n",
       "      <th>非本人金额占比_mean</th>\n",
       "      <th>非本人金额占比_max</th>\n",
       "      <th>非本人金额占比_min</th>\n",
       "      <th>交易次数小于等于5</th>\n",
       "      <th>总流出金额</th>\n",
       "      <th>总流出笔数</th>\n",
       "      <th>流出平均金额</th>\n",
       "      <th>流出金额方差</th>\n",
       "      <th>流出金额最大值</th>\n",
       "      <th>总流入金额</th>\n",
       "      <th>总流入笔数</th>\n",
       "      <th>流入平均金额</th>\n",
       "      <th>流入金额方差</th>\n",
       "      <th>流入金额最大值</th>\n",
       "      <th>总净流</th>\n",
       "      <th>总金额</th>\n",
       "      <th>总笔数</th>\n",
       "      <th>近一月流出金额</th>\n",
       "      <th>近一月流出笔数</th>\n",
       "      <th>近一月流入金额</th>\n",
       "      <th>近一月流入笔数</th>\n",
       "      <th>近一月总净流</th>\n",
       "      <th>近一月总金额</th>\n",
       "      <th>近一月总笔数</th>\n",
       "      <th>倒数第三月流出金额</th>\n",
       "      <th>倒数第三月流出笔数</th>\n",
       "      <th>倒数第三月流入金额</th>\n",
       "      <th>倒数第三月流入笔数</th>\n",
       "      <th>倒数第三月总净流</th>\n",
       "      <th>倒数第三月总金额</th>\n",
       "      <th>倒数第三月总笔数</th>\n",
       "      <th>第三个月与第一个月流入金额差</th>\n",
       "      <th>第三个月与第一个月流出金额差</th>\n",
       "      <th>第三个月与第一个月总金额差</th>\n",
       "      <th>第三个月与第一个月流出笔数差</th>\n",
       "      <th>第三个月与第一个月流入笔数差</th>\n",
       "      <th>第三个月与第一个月总笔数差</th>\n",
       "      <th>相关客户数</th>\n",
       "      <th>最后交易日流出金额</th>\n",
       "      <th>最后交易日流出笔数</th>\n",
       "      <th>最后交易日流入金额</th>\n",
       "      <th>最后交易日流入笔数</th>\n",
       "      <th>最后交易日总净流</th>\n",
       "      <th>最后交易日总金额</th>\n",
       "      <th>最后交易日总笔数</th>\n",
       "      <th>非工作日交易金额</th>\n",
       "      <th>非工作日交易笔数</th>\n",
       "      <th>企业交易绝对值最高金额</th>\n",
       "      <th>企业交易绝对值最低金额</th>\n",
       "      <th>企业交易绝对值_mean</th>\n",
       "      <th>企业交易绝对值_std</th>\n",
       "      <th>近一月平均账户余额</th>\n",
       "      <th>近一月最大账户余额</th>\n",
       "      <th>近一月账户余额方差</th>\n",
       "      <th>倒数第三个月平均账户余额</th>\n",
       "      <th>倒数第三个月最大账户余额</th>\n",
       "      <th>倒数第三个月账户余额方差</th>\n",
       "      <th>第三个月与第一个月余额均值差</th>\n",
       "      <th>第三个月与第一个月余额最大差</th>\n",
       "      <th>近一月交易代码个数</th>\n",
       "      <th>近一月渠道代码个数</th>\n",
       "      <th>倒数第三月交易代码个数</th>\n",
       "      <th>倒数第三月渠道代码个数</th>\n",
       "      <th>第三个月与第一个月渠道数差</th>\n",
       "      <th>第三个月与第一个月交易代码数差</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "      <td>141147.721138</td>\n",
       "      <td>1740000.0</td>\n",
       "      <td>0.014808</td>\n",
       "      <td>161000.670796</td>\n",
       "      <td>1183000.0</td>\n",
       "      <td>0.014808</td>\n",
       "      <td>116000.651573</td>\n",
       "      <td>1740000.0</td>\n",
       "      <td>0.147927</td>\n",
       "      <td>1.629809</td>\n",
       "      <td>2.799176</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.985128</td>\n",
       "      <td>0.985128</td>\n",
       "      <td>0.985128</td>\n",
       "      <td>0</td>\n",
       "      <td>583.82</td>\n",
       "      <td>19.0</td>\n",
       "      <td>30.727368</td>\n",
       "      <td>38.514283</td>\n",
       "      <td>106.75</td>\n",
       "      <td>418.88</td>\n",
       "      <td>15.0</td>\n",
       "      <td>27.925333</td>\n",
       "      <td>36.134326</td>\n",
       "      <td>106.75</td>\n",
       "      <td>-164.94</td>\n",
       "      <td>1002.7</td>\n",
       "      <td>34.0</td>\n",
       "      <td>130.15</td>\n",
       "      <td>5.0</td>\n",
       "      <td>78.81</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-51.34</td>\n",
       "      <td>208.96</td>\n",
       "      <td>7.0</td>\n",
       "      <td>388.28</td>\n",
       "      <td>9.0</td>\n",
       "      <td>213.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-174.78</td>\n",
       "      <td>601.78</td>\n",
       "      <td>11.0</td>\n",
       "      <td>-134.69</td>\n",
       "      <td>-258.13</td>\n",
       "      <td>-392.82</td>\n",
       "      <td>-4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>19.32</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.96</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-15.36</td>\n",
       "      <td>23.28</td>\n",
       "      <td>3.0</td>\n",
       "      <td>69.6</td>\n",
       "      <td>4.0</td>\n",
       "      <td>106.75</td>\n",
       "      <td>1.24</td>\n",
       "      <td>29.491176</td>\n",
       "      <td>36.946221</td>\n",
       "      <td>30.41</td>\n",
       "      <td>75.98</td>\n",
       "      <td>21.636022</td>\n",
       "      <td>69.550909</td>\n",
       "      <td>106.75</td>\n",
       "      <td>26.308696</td>\n",
       "      <td>-39.140909</td>\n",
       "      <td>-30.77</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号   总交易金额占比_mean  总交易金额占比_max  总交易金额占比_min  \\\n",
       "0  158a8d99bec2a2b652a6de45a2b52ec9  141147.721138    1740000.0     0.014808   \n",
       "\n",
       "     转入金额占比_mean  转入金额占比_max  转入金额占比_min    转出金额占比_mean  转出金额占比_max  \\\n",
       "0  161000.670796   1183000.0    0.014808  116000.651573   1740000.0   \n",
       "\n",
       "   转出金额占比_min  本人金额占比_mean  本人金额占比_max  本人金额占比_min  非本人金额占比_mean  非本人金额占比_max  \\\n",
       "0    0.147927     1.629809    2.799176         1.0      0.985128     0.985128   \n",
       "\n",
       "   非本人金额占比_min  交易次数小于等于5   总流出金额  总流出笔数     流出平均金额     流出金额方差  流出金额最大值  \\\n",
       "0     0.985128          0  583.82   19.0  30.727368  38.514283   106.75   \n",
       "\n",
       "    总流入金额  总流入笔数     流入平均金额     流入金额方差  流入金额最大值     总净流     总金额   总笔数  \\\n",
       "0  418.88   15.0  27.925333  36.134326   106.75 -164.94  1002.7  34.0   \n",
       "\n",
       "   近一月流出金额  近一月流出笔数  近一月流入金额  近一月流入笔数  近一月总净流  近一月总金额  近一月总笔数  倒数第三月流出金额  \\\n",
       "0   130.15      5.0    78.81      2.0  -51.34  208.96     7.0     388.28   \n",
       "\n",
       "   倒数第三月流出笔数  倒数第三月流入金额  倒数第三月流入笔数  倒数第三月总净流  倒数第三月总金额  倒数第三月总笔数  \\\n",
       "0        9.0      213.5        2.0   -174.78    601.78      11.0   \n",
       "\n",
       "   第三个月与第一个月流入金额差  第三个月与第一个月流出金额差  第三个月与第一个月总金额差  第三个月与第一个月流出笔数差  \\\n",
       "0         -134.69         -258.13        -392.82            -4.0   \n",
       "\n",
       "   第三个月与第一个月流入笔数差  第三个月与第一个月总笔数差  相关客户数  最后交易日流出金额  最后交易日流出笔数  最后交易日流入金额  \\\n",
       "0             0.0           -4.0    2.0      19.32        2.0       3.96   \n",
       "\n",
       "   最后交易日流入笔数  最后交易日总净流  最后交易日总金额  最后交易日总笔数  非工作日交易金额  非工作日交易笔数  企业交易绝对值最高金额  \\\n",
       "0        1.0    -15.36     23.28       3.0      69.6       4.0       106.75   \n",
       "\n",
       "   企业交易绝对值最低金额  企业交易绝对值_mean  企业交易绝对值_std  近一月平均账户余额  近一月最大账户余额  近一月账户余额方差  \\\n",
       "0         1.24     29.491176    36.946221      30.41      75.98  21.636022   \n",
       "\n",
       "   倒数第三个月平均账户余额  倒数第三个月最大账户余额  倒数第三个月账户余额方差  第三个月与第一个月余额均值差  第三个月与第一个月余额最大差  \\\n",
       "0     69.550909        106.75     26.308696      -39.140909          -30.77   \n",
       "\n",
       "   近一月交易代码个数  近一月渠道代码个数  倒数第三月交易代码个数  倒数第三月渠道代码个数  第三个月与第一个月渠道数差  \\\n",
       "0        6.0        4.0          5.0          4.0            0.0   \n",
       "\n",
       "   第三个月与第一个月交易代码数差  \n",
       "0              1.0  "
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fncl_tr_dtal_info = FNCL_TR_DTAL_info()\n",
    "print(fncl_tr_dtal_info.shape)\n",
    "fncl_tr_dtal_info.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "4cebb5ff-0c97-446c-ae8d-6a74146b4e67",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:31:38.202150Z",
     "iopub.status.busy": "2024-11-08T03:31:38.201754Z",
     "iopub.status.idle": "2024-11-08T03:31:38.217997Z",
     "msg_id": "fda7f59a-6535-4f5a-acde-405b21655b4c",
     "shell.execute_reply": "2024-11-08T03:31:38.217208Z",
     "shell.execute_reply.started": "2024-11-08T03:31:38.202121Z"
    }
   },
   "outputs": [],
   "source": [
    "def FNCL_TR_DTAL_info_2():\n",
    "    data = get_data(file_name=\"XW_ENTINFO_FNCL_TR_DTAL\")\n",
    "    data_1 = get_data(file_name=\"XW_ENTINFO_BASIC\")\n",
    "    data = data.merge(data_1[[\"数据日期\", \"客户编号\"]], how='left', on = [\"客户编号\"])\n",
    "    low_day_train = [20020428, 20020429, 20020502, 20020505, 20020506, 20020512, 20020513, 20020519, 20020526, 20020527, 20020528, 20020529, 20020530, 20020603, 20020609, 20020610, 20020616, 20020617, 20020623, 20020624, 20020630, 20020701, 20020707, 20020708, 20020714, 20020715, 20020719, 20020720, 20020721]\n",
    "    low_day_B = [20020921, 20020920, 20020919, 20020915, 20020914, 20020908, 20020907, 20020831, 20020901, 20020825, 20020824, 20020817, 20020818, 20020811, 20020810, 20020804, 20020731, 20020730, 20020729, 20020728, 20020727, 20020720, 20020714, 20020713, 20020707, 20020706, 20020703,20020629, 20020630]\n",
    "    xx = data[data[\"is_train\"] == 1]\n",
    "    yy = data[data[\"is_train\"] == 0]\n",
    "    xx[\"是否节假日或周末\"] = xx[\"交易日期\"].apply(lambda x:1 if x in low_day_train else 0)\n",
    "    yy[\"是否节假日或周末\"] = yy[\"交易日期\"].apply(lambda x:1 if x in low_day_B else 0)\n",
    "    data = pd.concat([xx, yy])\n",
    "    data['交易日期'] = data['交易日期'].astype('str')\n",
    "    data['交易日期'] = data['交易日期'].astype('datetime64[ns]')\n",
    "    data['数据日期'] = data['数据日期'].astype('str')\n",
    "    data['数据日期'] = data['数据日期'].astype('datetime64[ns]')\n",
    "    data['数据日期_距离_交易_天数']= data.apply(lambda x:(x['数据日期']-x['交易日期']).days, axis=1)\n",
    "\n",
    "    data_agg = pd.DataFrame(TARGET['客户编号'])\n",
    "\n",
    "    base_func = {\n",
    "        '交易代码': 'count',\n",
    "        '渠道代码': 'count',\n",
    "        '合约账户余额': ['mean', 'min', 'max', 'std'],\n",
    "        '折人民币交易金额': ['count', 'sum', 'max', 'std', 'skew', count_notzero, count_zero],\n",
    "        '交易对手客户编号': 'nunique'\n",
    "    }\n",
    "    columns = ['交易代码_count', '渠道代码_count', '合约账户余额_mean', '合约账户余额_min', '合约账户余额_max', '合约账户余额_std', '折人民币交易金额_count', '折人民币交易金额_sum', \n",
    "                '折人民币交易金额_max', '折人民币交易金额_std', '折人民币交易金额_skew', '折人民币交易金额_count_notzero', '折人民币交易金额_count_zero', '交易对手客户编号_nunique']  \n",
    "\n",
    "    #低频交易日交易情况\n",
    "    low_datediff = [7, 8, 12, 13, 19, 20, 26, 27, 33, 34, 41, 47, 48, 54, 57, 58, 59, 60, 61, 67, 68, 74, 75, 81, 82, 85, 88, 89]\n",
    "    a = data[data['数据日期_距离_交易_天数'] == 6]\n",
    "    for i in low_datediff:\n",
    "        tmp = data[data['数据日期_距离_交易_天数'] == i]\n",
    "        a = pd.concat([a, tmp])\n",
    "    \n",
    "    merge = a.groupby('客户编号').agg(base_func)\n",
    "    merge = merge.reset_index()\n",
    "    merge.columns = ['客户编号'] + [column + '_低频交易日' for column in columns]\n",
    "    data_agg = data_agg.merge(merge, how = 'left', on = '客户编号')\n",
    "\n",
    "    return data_agg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "af7b8fdb-7b4a-42cc-806d-def2dd1d7d53",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:31:38.219472Z",
     "iopub.status.busy": "2024-11-08T03:31:38.218955Z",
     "iopub.status.idle": "2024-11-08T03:36:30.411424Z",
     "msg_id": "1b5034b6-2e40-42ce-8886-7e4bba7ec9f1",
     "shell.execute_reply": "2024-11-08T03:36:30.410654Z",
     "shell.execute_reply.started": "2024-11-08T03:31:38.219445Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 15)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>交易代码_count_低频交易日</th>\n",
       "      <th>渠道代码_count_低频交易日</th>\n",
       "      <th>合约账户余额_mean_低频交易日</th>\n",
       "      <th>合约账户余额_min_低频交易日</th>\n",
       "      <th>合约账户余额_max_低频交易日</th>\n",
       "      <th>合约账户余额_std_低频交易日</th>\n",
       "      <th>折人民币交易金额_count_低频交易日</th>\n",
       "      <th>折人民币交易金额_sum_低频交易日</th>\n",
       "      <th>折人民币交易金额_max_低频交易日</th>\n",
       "      <th>折人民币交易金额_std_低频交易日</th>\n",
       "      <th>折人民币交易金额_skew_低频交易日</th>\n",
       "      <th>折人民币交易金额_count_notzero_低频交易日</th>\n",
       "      <th>折人民币交易金额_count_zero_低频交易日</th>\n",
       "      <th>交易对手客户编号_nunique_低频交易日</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>86.568571</td>\n",
       "      <td>70.63</td>\n",
       "      <td>106.75</td>\n",
       "      <td>14.981426</td>\n",
       "      <td>7.0</td>\n",
       "      <td>515.21</td>\n",
       "      <td>106.75</td>\n",
       "      <td>50.013348</td>\n",
       "      <td>-1.137199</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号  交易代码_count_低频交易日  渠道代码_count_低频交易日  \\\n",
       "0  158a8d99bec2a2b652a6de45a2b52ec9               7.0               7.0   \n",
       "\n",
       "   合约账户余额_mean_低频交易日  合约账户余额_min_低频交易日  合约账户余额_max_低频交易日  合约账户余额_std_低频交易日  \\\n",
       "0          86.568571             70.63            106.75         14.981426   \n",
       "\n",
       "   折人民币交易金额_count_低频交易日  折人民币交易金额_sum_低频交易日  折人民币交易金额_max_低频交易日  \\\n",
       "0                   7.0              515.21              106.75   \n",
       "\n",
       "   折人民币交易金额_std_低频交易日  折人民币交易金额_skew_低频交易日  折人民币交易金额_count_notzero_低频交易日  \\\n",
       "0           50.013348            -1.137199                           7.0   \n",
       "\n",
       "   折人民币交易金额_count_zero_低频交易日  交易对手客户编号_nunique_低频交易日  \n",
       "0                        0.0                     2.0  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fncl_tr_dtal_info_2 = FNCL_TR_DTAL_info_2()\n",
    "print(fncl_tr_dtal_info_2.shape)\n",
    "fncl_tr_dtal_info_2.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "220d2caa-7d8c-42dd-b13f-0fab53acac8a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:18:30.505824Z",
     "iopub.status.busy": "2024-11-11T03:18:30.505368Z",
     "iopub.status.idle": "2024-11-11T03:18:30.674266Z",
     "msg_id": "4556a456-1802-4c44-ac59-515851b05bef",
     "shell.execute_reply": "2024-11-11T03:18:30.673387Z",
     "shell.execute_reply.started": "2024-11-11T03:18:30.505793Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>总交易金额占比_mean</th>\n",
       "      <th>总交易金额占比_max</th>\n",
       "      <th>总交易金额占比_min</th>\n",
       "      <th>转入金额占比_mean</th>\n",
       "      <th>转入金额占比_max</th>\n",
       "      <th>转入金额占比_min</th>\n",
       "      <th>转出金额占比_mean</th>\n",
       "      <th>转出金额占比_max</th>\n",
       "      <th>转出金额占比_min</th>\n",
       "      <th>本人金额占比_mean</th>\n",
       "      <th>本人金额占比_max</th>\n",
       "      <th>本人金额占比_min</th>\n",
       "      <th>非本人金额占比_mean</th>\n",
       "      <th>非本人金额占比_max</th>\n",
       "      <th>非本人金额占比_min</th>\n",
       "      <th>交易次数小于等于5</th>\n",
       "      <th>总流出金额</th>\n",
       "      <th>总流出笔数</th>\n",
       "      <th>流出平均金额</th>\n",
       "      <th>流出金额方差</th>\n",
       "      <th>流出金额最大值</th>\n",
       "      <th>总流入金额</th>\n",
       "      <th>总流入笔数</th>\n",
       "      <th>流入平均金额</th>\n",
       "      <th>流入金额方差</th>\n",
       "      <th>流入金额最大值</th>\n",
       "      <th>总净流</th>\n",
       "      <th>总金额</th>\n",
       "      <th>总笔数</th>\n",
       "      <th>近一月流出金额</th>\n",
       "      <th>近一月流出笔数</th>\n",
       "      <th>近一月流入金额</th>\n",
       "      <th>近一月流入笔数</th>\n",
       "      <th>近一月总净流</th>\n",
       "      <th>近一月总金额</th>\n",
       "      <th>近一月总笔数</th>\n",
       "      <th>倒数第三月流出金额</th>\n",
       "      <th>倒数第三月流出笔数</th>\n",
       "      <th>倒数第三月流入金额</th>\n",
       "      <th>倒数第三月流入笔数</th>\n",
       "      <th>倒数第三月总净流</th>\n",
       "      <th>倒数第三月总金额</th>\n",
       "      <th>倒数第三月总笔数</th>\n",
       "      <th>第三个月与第一个月流入金额差</th>\n",
       "      <th>第三个月与第一个月流出金额差</th>\n",
       "      <th>第三个月与第一个月总金额差</th>\n",
       "      <th>第三个月与第一个月流出笔数差</th>\n",
       "      <th>第三个月与第一个月流入笔数差</th>\n",
       "      <th>第三个月与第一个月总笔数差</th>\n",
       "      <th>相关客户数</th>\n",
       "      <th>最后交易日流出金额</th>\n",
       "      <th>最后交易日流出笔数</th>\n",
       "      <th>最后交易日流入金额</th>\n",
       "      <th>最后交易日流入笔数</th>\n",
       "      <th>最后交易日总净流</th>\n",
       "      <th>最后交易日总金额</th>\n",
       "      <th>最后交易日总笔数</th>\n",
       "      <th>非工作日交易金额</th>\n",
       "      <th>非工作日交易笔数</th>\n",
       "      <th>企业交易绝对值最高金额</th>\n",
       "      <th>企业交易绝对值最低金额</th>\n",
       "      <th>企业交易绝对值_mean</th>\n",
       "      <th>企业交易绝对值_std</th>\n",
       "      <th>近一月平均账户余额</th>\n",
       "      <th>近一月最大账户余额</th>\n",
       "      <th>近一月账户余额方差</th>\n",
       "      <th>倒数第三个月平均账户余额</th>\n",
       "      <th>倒数第三个月最大账户余额</th>\n",
       "      <th>倒数第三个月账户余额方差</th>\n",
       "      <th>第三个月与第一个月余额均值差</th>\n",
       "      <th>第三个月与第一个月余额最大差</th>\n",
       "      <th>近一月交易代码个数</th>\n",
       "      <th>近一月渠道代码个数</th>\n",
       "      <th>倒数第三月交易代码个数</th>\n",
       "      <th>倒数第三月渠道代码个数</th>\n",
       "      <th>第三个月与第一个月渠道数差</th>\n",
       "      <th>第三个月与第一个月交易代码数差</th>\n",
       "      <th>交易代码_count_低频交易日</th>\n",
       "      <th>渠道代码_count_低频交易日</th>\n",
       "      <th>合约账户余额_mean_低频交易日</th>\n",
       "      <th>合约账户余额_min_低频交易日</th>\n",
       "      <th>合约账户余额_max_低频交易日</th>\n",
       "      <th>合约账户余额_std_低频交易日</th>\n",
       "      <th>折人民币交易金额_count_低频交易日</th>\n",
       "      <th>折人民币交易金额_sum_低频交易日</th>\n",
       "      <th>折人民币交易金额_max_低频交易日</th>\n",
       "      <th>折人民币交易金额_std_低频交易日</th>\n",
       "      <th>折人民币交易金额_skew_低频交易日</th>\n",
       "      <th>折人民币交易金额_count_notzero_低频交易日</th>\n",
       "      <th>折人民币交易金额_count_zero_低频交易日</th>\n",
       "      <th>交易对手客户编号_nunique_低频交易日</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "      <td>141147.721138</td>\n",
       "      <td>1740000.0</td>\n",
       "      <td>0.014808</td>\n",
       "      <td>161000.670796</td>\n",
       "      <td>1183000.0</td>\n",
       "      <td>0.014808</td>\n",
       "      <td>116000.651573</td>\n",
       "      <td>1740000.0</td>\n",
       "      <td>0.147927</td>\n",
       "      <td>1.629809</td>\n",
       "      <td>2.799176</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.985128</td>\n",
       "      <td>0.985128</td>\n",
       "      <td>0.985128</td>\n",
       "      <td>0</td>\n",
       "      <td>583.82</td>\n",
       "      <td>19.0</td>\n",
       "      <td>30.727368</td>\n",
       "      <td>38.514283</td>\n",
       "      <td>106.75</td>\n",
       "      <td>418.88</td>\n",
       "      <td>15.0</td>\n",
       "      <td>27.925333</td>\n",
       "      <td>36.134326</td>\n",
       "      <td>106.75</td>\n",
       "      <td>-164.94</td>\n",
       "      <td>1002.7</td>\n",
       "      <td>34.0</td>\n",
       "      <td>130.15</td>\n",
       "      <td>5.0</td>\n",
       "      <td>78.81</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-51.34</td>\n",
       "      <td>208.96</td>\n",
       "      <td>7.0</td>\n",
       "      <td>388.28</td>\n",
       "      <td>9.0</td>\n",
       "      <td>213.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>-174.78</td>\n",
       "      <td>601.78</td>\n",
       "      <td>11.0</td>\n",
       "      <td>-134.69</td>\n",
       "      <td>-258.13</td>\n",
       "      <td>-392.82</td>\n",
       "      <td>-4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>-4.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>19.32</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.96</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-15.36</td>\n",
       "      <td>23.28</td>\n",
       "      <td>3.0</td>\n",
       "      <td>69.6</td>\n",
       "      <td>4.0</td>\n",
       "      <td>106.75</td>\n",
       "      <td>1.24</td>\n",
       "      <td>29.491176</td>\n",
       "      <td>36.946221</td>\n",
       "      <td>30.41</td>\n",
       "      <td>75.98</td>\n",
       "      <td>21.636022</td>\n",
       "      <td>69.550909</td>\n",
       "      <td>106.75</td>\n",
       "      <td>26.308696</td>\n",
       "      <td>-39.140909</td>\n",
       "      <td>-30.77</td>\n",
       "      <td>6.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>86.568571</td>\n",
       "      <td>70.63</td>\n",
       "      <td>106.75</td>\n",
       "      <td>14.981426</td>\n",
       "      <td>7.0</td>\n",
       "      <td>515.21</td>\n",
       "      <td>106.75</td>\n",
       "      <td>50.013348</td>\n",
       "      <td>-1.137199</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号   总交易金额占比_mean  总交易金额占比_max  总交易金额占比_min  \\\n",
       "0  158a8d99bec2a2b652a6de45a2b52ec9  141147.721138    1740000.0     0.014808   \n",
       "\n",
       "     转入金额占比_mean  转入金额占比_max  转入金额占比_min    转出金额占比_mean  转出金额占比_max  \\\n",
       "0  161000.670796   1183000.0    0.014808  116000.651573   1740000.0   \n",
       "\n",
       "   转出金额占比_min  本人金额占比_mean  本人金额占比_max  本人金额占比_min  非本人金额占比_mean  非本人金额占比_max  \\\n",
       "0    0.147927     1.629809    2.799176         1.0      0.985128     0.985128   \n",
       "\n",
       "   非本人金额占比_min  交易次数小于等于5   总流出金额  总流出笔数     流出平均金额     流出金额方差  流出金额最大值  \\\n",
       "0     0.985128          0  583.82   19.0  30.727368  38.514283   106.75   \n",
       "\n",
       "    总流入金额  总流入笔数     流入平均金额     流入金额方差  流入金额最大值     总净流     总金额   总笔数  \\\n",
       "0  418.88   15.0  27.925333  36.134326   106.75 -164.94  1002.7  34.0   \n",
       "\n",
       "   近一月流出金额  近一月流出笔数  近一月流入金额  近一月流入笔数  近一月总净流  近一月总金额  近一月总笔数  倒数第三月流出金额  \\\n",
       "0   130.15      5.0    78.81      2.0  -51.34  208.96     7.0     388.28   \n",
       "\n",
       "   倒数第三月流出笔数  倒数第三月流入金额  倒数第三月流入笔数  倒数第三月总净流  倒数第三月总金额  倒数第三月总笔数  \\\n",
       "0        9.0      213.5        2.0   -174.78    601.78      11.0   \n",
       "\n",
       "   第三个月与第一个月流入金额差  第三个月与第一个月流出金额差  第三个月与第一个月总金额差  第三个月与第一个月流出笔数差  \\\n",
       "0         -134.69         -258.13        -392.82            -4.0   \n",
       "\n",
       "   第三个月与第一个月流入笔数差  第三个月与第一个月总笔数差  相关客户数  最后交易日流出金额  最后交易日流出笔数  最后交易日流入金额  \\\n",
       "0             0.0           -4.0    2.0      19.32        2.0       3.96   \n",
       "\n",
       "   最后交易日流入笔数  最后交易日总净流  最后交易日总金额  最后交易日总笔数  非工作日交易金额  非工作日交易笔数  企业交易绝对值最高金额  \\\n",
       "0        1.0    -15.36     23.28       3.0      69.6       4.0       106.75   \n",
       "\n",
       "   企业交易绝对值最低金额  企业交易绝对值_mean  企业交易绝对值_std  近一月平均账户余额  近一月最大账户余额  近一月账户余额方差  \\\n",
       "0         1.24     29.491176    36.946221      30.41      75.98  21.636022   \n",
       "\n",
       "   倒数第三个月平均账户余额  倒数第三个月最大账户余额  倒数第三个月账户余额方差  第三个月与第一个月余额均值差  第三个月与第一个月余额最大差  \\\n",
       "0     69.550909        106.75     26.308696      -39.140909          -30.77   \n",
       "\n",
       "   近一月交易代码个数  近一月渠道代码个数  倒数第三月交易代码个数  倒数第三月渠道代码个数  第三个月与第一个月渠道数差  \\\n",
       "0        6.0        4.0          5.0          4.0            0.0   \n",
       "\n",
       "   第三个月与第一个月交易代码数差  交易代码_count_低频交易日  渠道代码_count_低频交易日  合约账户余额_mean_低频交易日  \\\n",
       "0              1.0               7.0               7.0          86.568571   \n",
       "\n",
       "   合约账户余额_min_低频交易日  合约账户余额_max_低频交易日  合约账户余额_std_低频交易日  折人民币交易金额_count_低频交易日  \\\n",
       "0             70.63            106.75         14.981426                   7.0   \n",
       "\n",
       "   折人民币交易金额_sum_低频交易日  折人民币交易金额_max_低频交易日  折人民币交易金额_std_低频交易日  \\\n",
       "0              515.21              106.75           50.013348   \n",
       "\n",
       "   折人民币交易金额_skew_低频交易日  折人民币交易金额_count_notzero_低频交易日  \\\n",
       "0            -1.137199                           7.0   \n",
       "\n",
       "   折人民币交易金额_count_zero_低频交易日  交易对手客户编号_nunique_低频交易日  \n",
       "0                        0.0                     2.0  "
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fncl_tr_dtal_info_all = fncl_tr_dtal_info.merge(fncl_tr_dtal_info_2, on = '客户编号', how = 'left')\n",
    "fncl_tr_dtal_info_all.head(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5ec39658-7cc4-4907-a1a9-a4bc460fcbfc",
   "metadata": {},
   "source": [
    "# 交易明细文本特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "1cf49376-6533-4b60-832b-6ff1aff9d630",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:36:30.563135Z",
     "iopub.status.busy": "2024-11-08T03:36:30.562862Z",
     "iopub.status.idle": "2024-11-08T03:36:30.573186Z",
     "msg_id": "31f97f5c-1e1f-48c4-8ce4-8f9b0ba804f8",
     "shell.execute_reply": "2024-11-08T03:36:30.572416Z",
     "shell.execute_reply.started": "2024-11-08T03:36:30.563109Z"
    }
   },
   "outputs": [],
   "source": [
    "def FNCL_TR_DTAL_text():\n",
    "    file_name = 'XW_ENTINFO_FNCL_TR_DTAL'\n",
    "    FNCL_TR_DTAL = get_data(file_name, num_rows=None)\n",
    "    file_name = 'XW_ENTINFO_TARGET'\n",
    "    TARGET = get_data(file_name, num_rows=None)\n",
    "    TARGET = TARGET.drop(['数据日期'], axis = 1)\n",
    "    for i in ['交易代码','渠道代码','摘要信息', '交易对手客户编号']:\n",
    "        tmp = text_feats(FNCL_TR_DTAL, '客户编号', i, num=10)\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "    for i in ['交易代码','渠道代码','摘要信息', '交易对手客户编号']:\n",
    "        tmp = word2vec_feature(FNCL_TR_DTAL, '客户编号', i, ext=\"B\")\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "\n",
    "    FNCL_TR_DTAL['交易代码_渠道代码'] = FNCL_TR_DTAL['交易代码'] + FNCL_TR_DTAL['渠道代码']\n",
    "    FNCL_TR_DTAL['交易代码_摘要信息'] = FNCL_TR_DTAL['交易代码'] + FNCL_TR_DTAL['摘要信息']\n",
    "    FNCL_TR_DTAL['交易代码_交易对手客户编号'] = FNCL_TR_DTAL['交易代码'] + FNCL_TR_DTAL['交易对手客户编号']\n",
    "    FNCL_TR_DTAL['渠道代码_摘要信息'] = FNCL_TR_DTAL['渠道代码'] + FNCL_TR_DTAL['摘要信息']\n",
    "    FNCL_TR_DTAL['渠道代码_交易对手客户编号'] = FNCL_TR_DTAL['渠道代码'] + FNCL_TR_DTAL['交易对手客户编号']\n",
    "    FNCL_TR_DTAL['摘要信息_交易对手客户编号'] = FNCL_TR_DTAL['摘要信息'] + FNCL_TR_DTAL['交易对手客户编号']\n",
    "\n",
    "    for i in ['交易代码_渠道代码','交易代码_摘要信息','交易代码_交易对手客户编号', '渠道代码_摘要信息', '渠道代码_交易对手客户编号', '摘要信息_交易对手客户编号']:\n",
    "        tmp = text_feats(FNCL_TR_DTAL, '客户编号', i, num=10)\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "    for i in ['交易代码_渠道代码','交易代码_摘要信息','交易代码_交易对手客户编号', '渠道代码_摘要信息', '渠道代码_交易对手客户编号', '摘要信息_交易对手客户编号']:\n",
    "        tmp = word2vec_feature(FNCL_TR_DTAL, '客户编号', i, ext=\"B\")\n",
    "        TARGET = TARGET.merge(tmp, on = '客户编号', how = 'left')\n",
    "        \n",
    "    TARGET = TARGET.drop(['is_train', 'FLAG'], axis = 1)\n",
    "    return TARGET"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "efb3a32c-e381-41e9-9302-ce8d5143bfab",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-08T03:36:30.574330Z",
     "iopub.status.busy": "2024-11-08T03:36:30.574088Z",
     "iopub.status.idle": "2024-11-08T04:50:32.950470Z",
     "msg_id": "76c9c42d-5f3b-4ee4-84c5-9e3577592e92",
     "shell.execute_reply": "2024-11-08T04:50:32.949672Z",
     "shell.execute_reply.started": "2024-11-08T03:36:30.574305Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 281)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>交易代码_tfidf_0</th>\n",
       "      <th>交易代码_tfidf_1</th>\n",
       "      <th>交易代码_tfidf_2</th>\n",
       "      <th>交易代码_tfidf_3</th>\n",
       "      <th>交易代码_tfidf_4</th>\n",
       "      <th>交易代码_tfidf_5</th>\n",
       "      <th>交易代码_tfidf_6</th>\n",
       "      <th>交易代码_tfidf_7</th>\n",
       "      <th>交易代码_tfidf_8</th>\n",
       "      <th>交易代码_tfidf_9</th>\n",
       "      <th>交易代码_countvec_0</th>\n",
       "      <th>交易代码_countvec_1</th>\n",
       "      <th>交易代码_countvec_2</th>\n",
       "      <th>交易代码_countvec_3</th>\n",
       "      <th>交易代码_countvec_4</th>\n",
       "      <th>交易代码_countvec_5</th>\n",
       "      <th>交易代码_countvec_6</th>\n",
       "      <th>交易代码_countvec_7</th>\n",
       "      <th>交易代码_countvec_8</th>\n",
       "      <th>交易代码_countvec_9</th>\n",
       "      <th>渠道代码_tfidf_0</th>\n",
       "      <th>渠道代码_tfidf_1</th>\n",
       "      <th>渠道代码_tfidf_2</th>\n",
       "      <th>渠道代码_tfidf_3</th>\n",
       "      <th>渠道代码_tfidf_4</th>\n",
       "      <th>渠道代码_tfidf_5</th>\n",
       "      <th>渠道代码_tfidf_6</th>\n",
       "      <th>渠道代码_tfidf_7</th>\n",
       "      <th>渠道代码_tfidf_8</th>\n",
       "      <th>渠道代码_tfidf_9</th>\n",
       "      <th>渠道代码_countvec_0</th>\n",
       "      <th>渠道代码_countvec_1</th>\n",
       "      <th>渠道代码_countvec_2</th>\n",
       "      <th>渠道代码_countvec_3</th>\n",
       "      <th>渠道代码_countvec_4</th>\n",
       "      <th>渠道代码_countvec_5</th>\n",
       "      <th>渠道代码_countvec_6</th>\n",
       "      <th>渠道代码_countvec_7</th>\n",
       "      <th>渠道代码_countvec_8</th>\n",
       "      <th>渠道代码_countvec_9</th>\n",
       "      <th>摘要信息_tfidf_0</th>\n",
       "      <th>摘要信息_tfidf_1</th>\n",
       "      <th>摘要信息_tfidf_2</th>\n",
       "      <th>摘要信息_tfidf_3</th>\n",
       "      <th>摘要信息_tfidf_4</th>\n",
       "      <th>摘要信息_tfidf_5</th>\n",
       "      <th>摘要信息_tfidf_6</th>\n",
       "      <th>摘要信息_tfidf_7</th>\n",
       "      <th>摘要信息_tfidf_8</th>\n",
       "      <th>摘要信息_tfidf_9</th>\n",
       "      <th>摘要信息_countvec_0</th>\n",
       "      <th>摘要信息_countvec_1</th>\n",
       "      <th>摘要信息_countvec_2</th>\n",
       "      <th>摘要信息_countvec_3</th>\n",
       "      <th>摘要信息_countvec_4</th>\n",
       "      <th>摘要信息_countvec_5</th>\n",
       "      <th>摘要信息_countvec_6</th>\n",
       "      <th>摘要信息_countvec_7</th>\n",
       "      <th>摘要信息_countvec_8</th>\n",
       "      <th>摘要信息_countvec_9</th>\n",
       "      <th>交易对手客户编号_tfidf_0</th>\n",
       "      <th>交易对手客户编号_tfidf_1</th>\n",
       "      <th>交易对手客户编号_tfidf_2</th>\n",
       "      <th>交易对手客户编号_tfidf_3</th>\n",
       "      <th>交易对手客户编号_tfidf_4</th>\n",
       "      <th>交易对手客户编号_tfidf_5</th>\n",
       "      <th>交易对手客户编号_tfidf_6</th>\n",
       "      <th>交易对手客户编号_tfidf_7</th>\n",
       "      <th>交易对手客户编号_tfidf_8</th>\n",
       "      <th>交易对手客户编号_tfidf_9</th>\n",
       "      <th>交易对手客户编号_countvec_0</th>\n",
       "      <th>交易对手客户编号_countvec_1</th>\n",
       "      <th>交易对手客户编号_countvec_2</th>\n",
       "      <th>交易对手客户编号_countvec_3</th>\n",
       "      <th>交易对手客户编号_countvec_4</th>\n",
       "      <th>交易对手客户编号_countvec_5</th>\n",
       "      <th>交易对手客户编号_countvec_6</th>\n",
       "      <th>交易对手客户编号_countvec_7</th>\n",
       "      <th>交易对手客户编号_countvec_8</th>\n",
       "      <th>交易对手客户编号_countvec_9</th>\n",
       "      <th>客户编号_交易代码_w2v_0</th>\n",
       "      <th>客户编号_交易代码_w2v_1</th>\n",
       "      <th>客户编号_交易代码_w2v_2</th>\n",
       "      <th>客户编号_交易代码_w2v_3</th>\n",
       "      <th>客户编号_交易代码_w2v_4</th>\n",
       "      <th>客户编号_交易代码_w2v_5</th>\n",
       "      <th>客户编号_交易代码_w2v_6</th>\n",
       "      <th>客户编号_交易代码_w2v_7</th>\n",
       "      <th>客户编号_渠道代码_w2v_0</th>\n",
       "      <th>客户编号_渠道代码_w2v_1</th>\n",
       "      <th>客户编号_渠道代码_w2v_2</th>\n",
       "      <th>客户编号_渠道代码_w2v_3</th>\n",
       "      <th>客户编号_渠道代码_w2v_4</th>\n",
       "      <th>客户编号_渠道代码_w2v_5</th>\n",
       "      <th>客户编号_渠道代码_w2v_6</th>\n",
       "      <th>客户编号_渠道代码_w2v_7</th>\n",
       "      <th>客户编号_摘要信息_w2v_0</th>\n",
       "      <th>客户编号_摘要信息_w2v_1</th>\n",
       "      <th>客户编号_摘要信息_w2v_2</th>\n",
       "      <th>...</th>\n",
       "      <th>渠道代码_摘要信息_tfidf_8</th>\n",
       "      <th>渠道代码_摘要信息_tfidf_9</th>\n",
       "      <th>渠道代码_摘要信息_countvec_0</th>\n",
       "      <th>渠道代码_摘要信息_countvec_1</th>\n",
       "      <th>渠道代码_摘要信息_countvec_2</th>\n",
       "      <th>渠道代码_摘要信息_countvec_3</th>\n",
       "      <th>渠道代码_摘要信息_countvec_4</th>\n",
       "      <th>渠道代码_摘要信息_countvec_5</th>\n",
       "      <th>渠道代码_摘要信息_countvec_6</th>\n",
       "      <th>渠道代码_摘要信息_countvec_7</th>\n",
       "      <th>渠道代码_摘要信息_countvec_8</th>\n",
       "      <th>渠道代码_摘要信息_countvec_9</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_0</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_1</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_2</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_3</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_4</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_5</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_6</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_7</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_8</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_9</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_0</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_1</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_2</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_3</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_4</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_5</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_6</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_7</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_8</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_9</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_0</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_1</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_2</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_3</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_4</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_5</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_6</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_7</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_8</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_9</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_0</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_1</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_2</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_3</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_4</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_5</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_6</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_7</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_8</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_9</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_0</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_1</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_2</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_3</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_4</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_5</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_6</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_7</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_0</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_1</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_2</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_3</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_4</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_5</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_6</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_7</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_7</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_0</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_1</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_2</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_3</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_4</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_5</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_6</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_7</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_7</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "      <td>0.810723</td>\n",
       "      <td>-0.162497</td>\n",
       "      <td>-0.092121</td>\n",
       "      <td>-0.092624</td>\n",
       "      <td>0.218853</td>\n",
       "      <td>-0.00578</td>\n",
       "      <td>-0.053608</td>\n",
       "      <td>-0.014666</td>\n",
       "      <td>-0.077798</td>\n",
       "      <td>0.061295</td>\n",
       "      <td>2.012122</td>\n",
       "      <td>-0.831827</td>\n",
       "      <td>4.987333</td>\n",
       "      <td>-0.985892</td>\n",
       "      <td>-0.001584</td>\n",
       "      <td>1.115439</td>\n",
       "      <td>0.026855</td>\n",
       "      <td>4.423461</td>\n",
       "      <td>-1.987232</td>\n",
       "      <td>-11.330246</td>\n",
       "      <td>0.890244</td>\n",
       "      <td>-0.109296</td>\n",
       "      <td>0.329066</td>\n",
       "      <td>-0.062887</td>\n",
       "      <td>-0.016809</td>\n",
       "      <td>-0.013846</td>\n",
       "      <td>0.015724</td>\n",
       "      <td>-0.083317</td>\n",
       "      <td>-0.033072</td>\n",
       "      <td>0.001104</td>\n",
       "      <td>7.853681</td>\n",
       "      <td>0.145821</td>\n",
       "      <td>19.316191</td>\n",
       "      <td>1.516802</td>\n",
       "      <td>-3.050044</td>\n",
       "      <td>-1.780126</td>\n",
       "      <td>0.075722</td>\n",
       "      <td>-0.104005</td>\n",
       "      <td>-0.103899</td>\n",
       "      <td>-0.547517</td>\n",
       "      <td>0.544492</td>\n",
       "      <td>-0.020419</td>\n",
       "      <td>-0.058198</td>\n",
       "      <td>-0.024149</td>\n",
       "      <td>-0.005315</td>\n",
       "      <td>0.008176</td>\n",
       "      <td>-0.008688</td>\n",
       "      <td>-0.010355</td>\n",
       "      <td>-0.010614</td>\n",
       "      <td>-0.012956</td>\n",
       "      <td>23.003964</td>\n",
       "      <td>0.922264</td>\n",
       "      <td>-0.02455</td>\n",
       "      <td>-0.000378</td>\n",
       "      <td>-0.005315</td>\n",
       "      <td>0.000444</td>\n",
       "      <td>-0.001005</td>\n",
       "      <td>-0.001648</td>\n",
       "      <td>0.004577</td>\n",
       "      <td>-0.018144</td>\n",
       "      <td>0.88464</td>\n",
       "      <td>-0.209981</td>\n",
       "      <td>-0.005231</td>\n",
       "      <td>-0.000383</td>\n",
       "      <td>-0.000128</td>\n",
       "      <td>-0.000927</td>\n",
       "      <td>-0.000066</td>\n",
       "      <td>-0.000157</td>\n",
       "      <td>0.000022</td>\n",
       "      <td>-0.001215</td>\n",
       "      <td>0.233184</td>\n",
       "      <td>13.982342</td>\n",
       "      <td>20.18054</td>\n",
       "      <td>-0.997424</td>\n",
       "      <td>0.183728</td>\n",
       "      <td>-0.122282</td>\n",
       "      <td>-0.034816</td>\n",
       "      <td>-0.066169</td>\n",
       "      <td>0.013323</td>\n",
       "      <td>-0.001122</td>\n",
       "      <td>0.697141</td>\n",
       "      <td>0.077173</td>\n",
       "      <td>0.122017</td>\n",
       "      <td>0.352532</td>\n",
       "      <td>-0.393124</td>\n",
       "      <td>-0.233153</td>\n",
       "      <td>0.734571</td>\n",
       "      <td>0.529696</td>\n",
       "      <td>-0.098773</td>\n",
       "      <td>-0.369754</td>\n",
       "      <td>-0.518923</td>\n",
       "      <td>-0.583388</td>\n",
       "      <td>-0.756928</td>\n",
       "      <td>0.067343</td>\n",
       "      <td>-0.452553</td>\n",
       "      <td>0.450838</td>\n",
       "      <td>-0.209803</td>\n",
       "      <td>0.348169</td>\n",
       "      <td>2.220767</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.034319</td>\n",
       "      <td>-0.002089</td>\n",
       "      <td>1.858848</td>\n",
       "      <td>8.185632</td>\n",
       "      <td>-2.171361</td>\n",
       "      <td>-16.286702</td>\n",
       "      <td>0.072092</td>\n",
       "      <td>-0.356895</td>\n",
       "      <td>0.006055</td>\n",
       "      <td>1.709881</td>\n",
       "      <td>-0.224256</td>\n",
       "      <td>-0.140932</td>\n",
       "      <td>0.747345</td>\n",
       "      <td>0.084474</td>\n",
       "      <td>-0.118081</td>\n",
       "      <td>-0.087905</td>\n",
       "      <td>-0.029901</td>\n",
       "      <td>-0.052425</td>\n",
       "      <td>0.091839</td>\n",
       "      <td>0.220949</td>\n",
       "      <td>-0.014612</td>\n",
       "      <td>-0.045959</td>\n",
       "      <td>0.056557</td>\n",
       "      <td>1.864093</td>\n",
       "      <td>6.557424</td>\n",
       "      <td>-0.155376</td>\n",
       "      <td>1.547072</td>\n",
       "      <td>0.360551</td>\n",
       "      <td>14.312553</td>\n",
       "      <td>0.776877</td>\n",
       "      <td>-2.122171</td>\n",
       "      <td>-0.223114</td>\n",
       "      <td>0.419106</td>\n",
       "      <td>0.0166</td>\n",
       "      <td>-0.05208</td>\n",
       "      <td>-0.023534</td>\n",
       "      <td>-0.013723</td>\n",
       "      <td>-0.015781</td>\n",
       "      <td>-0.011465</td>\n",
       "      <td>0.000263</td>\n",
       "      <td>0.006329</td>\n",
       "      <td>-0.008315</td>\n",
       "      <td>0.004686</td>\n",
       "      <td>7.08161</td>\n",
       "      <td>-0.203347</td>\n",
       "      <td>15.55727</td>\n",
       "      <td>-0.29409</td>\n",
       "      <td>-0.416516</td>\n",
       "      <td>-0.169363</td>\n",
       "      <td>-0.083976</td>\n",
       "      <td>-0.020281</td>\n",
       "      <td>-0.315465</td>\n",
       "      <td>1.216363</td>\n",
       "      <td>0.387643</td>\n",
       "      <td>0.041235</td>\n",
       "      <td>0.122863</td>\n",
       "      <td>0.089642</td>\n",
       "      <td>0.360907</td>\n",
       "      <td>0.173583</td>\n",
       "      <td>-0.064026</td>\n",
       "      <td>-1.275527</td>\n",
       "      <td>-2.232501</td>\n",
       "      <td>-1.986748</td>\n",
       "      <td>0.74615</td>\n",
       "      <td>1.797314</td>\n",
       "      <td>0.352396</td>\n",
       "      <td>-1.015768</td>\n",
       "      <td>0.462867</td>\n",
       "      <td>0.566154</td>\n",
       "      <td>-0.368129</td>\n",
       "      <td>-0.174086</td>\n",
       "      <td>-1.61998</td>\n",
       "      <td>-0.334377</td>\n",
       "      <td>1.329901</td>\n",
       "      <td>-1.552544</td>\n",
       "      <td>-0.683254</td>\n",
       "      <td>0.606885</td>\n",
       "      <td>-0.58038</td>\n",
       "      <td>0.4367</td>\n",
       "      <td>-1.445817</td>\n",
       "      <td>-0.862692</td>\n",
       "      <td>-0.516107</td>\n",
       "      <td>-3.854006</td>\n",
       "      <td>1.488673</td>\n",
       "      <td>0.283305</td>\n",
       "      <td>-0.758342</td>\n",
       "      <td>-1.447038</td>\n",
       "      <td>2.477799</td>\n",
       "      <td>-0.113421</td>\n",
       "      <td>0.460692</td>\n",
       "      <td>-1.110022</td>\n",
       "      <td>-0.269635</td>\n",
       "      <td>0.127737</td>\n",
       "      <td>-1.632896</td>\n",
       "      <td>2.295154</td>\n",
       "      <td>0.965088</td>\n",
       "      <td>-0.534077</td>\n",
       "      <td>2.403713</td>\n",
       "      <td>-0.769188</td>\n",
       "      <td>2.437117</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 281 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号  交易代码_tfidf_0  交易代码_tfidf_1  交易代码_tfidf_2  \\\n",
       "0  158a8d99bec2a2b652a6de45a2b52ec9      0.810723     -0.162497     -0.092121   \n",
       "\n",
       "   交易代码_tfidf_3  交易代码_tfidf_4  交易代码_tfidf_5  交易代码_tfidf_6  交易代码_tfidf_7  \\\n",
       "0     -0.092624      0.218853      -0.00578     -0.053608     -0.014666   \n",
       "\n",
       "   交易代码_tfidf_8  交易代码_tfidf_9  交易代码_countvec_0  交易代码_countvec_1  \\\n",
       "0     -0.077798      0.061295         2.012122        -0.831827   \n",
       "\n",
       "   交易代码_countvec_2  交易代码_countvec_3  交易代码_countvec_4  交易代码_countvec_5  \\\n",
       "0         4.987333        -0.985892        -0.001584         1.115439   \n",
       "\n",
       "   交易代码_countvec_6  交易代码_countvec_7  交易代码_countvec_8  交易代码_countvec_9  \\\n",
       "0         0.026855         4.423461        -1.987232       -11.330246   \n",
       "\n",
       "   渠道代码_tfidf_0  渠道代码_tfidf_1  渠道代码_tfidf_2  渠道代码_tfidf_3  渠道代码_tfidf_4  \\\n",
       "0      0.890244     -0.109296      0.329066     -0.062887     -0.016809   \n",
       "\n",
       "   渠道代码_tfidf_5  渠道代码_tfidf_6  渠道代码_tfidf_7  渠道代码_tfidf_8  渠道代码_tfidf_9  \\\n",
       "0     -0.013846      0.015724     -0.083317     -0.033072      0.001104   \n",
       "\n",
       "   渠道代码_countvec_0  渠道代码_countvec_1  渠道代码_countvec_2  渠道代码_countvec_3  \\\n",
       "0         7.853681         0.145821        19.316191         1.516802   \n",
       "\n",
       "   渠道代码_countvec_4  渠道代码_countvec_5  渠道代码_countvec_6  渠道代码_countvec_7  \\\n",
       "0        -3.050044        -1.780126         0.075722        -0.104005   \n",
       "\n",
       "   渠道代码_countvec_8  渠道代码_countvec_9  摘要信息_tfidf_0  摘要信息_tfidf_1  摘要信息_tfidf_2  \\\n",
       "0        -0.103899        -0.547517      0.544492     -0.020419     -0.058198   \n",
       "\n",
       "   摘要信息_tfidf_3  摘要信息_tfidf_4  摘要信息_tfidf_5  摘要信息_tfidf_6  摘要信息_tfidf_7  \\\n",
       "0     -0.024149     -0.005315      0.008176     -0.008688     -0.010355   \n",
       "\n",
       "   摘要信息_tfidf_8  摘要信息_tfidf_9  摘要信息_countvec_0  摘要信息_countvec_1  \\\n",
       "0     -0.010614     -0.012956        23.003964         0.922264   \n",
       "\n",
       "   摘要信息_countvec_2  摘要信息_countvec_3  摘要信息_countvec_4  摘要信息_countvec_5  \\\n",
       "0         -0.02455        -0.000378        -0.005315         0.000444   \n",
       "\n",
       "   摘要信息_countvec_6  摘要信息_countvec_7  摘要信息_countvec_8  摘要信息_countvec_9  \\\n",
       "0        -0.001005        -0.001648         0.004577        -0.018144   \n",
       "\n",
       "   交易对手客户编号_tfidf_0  交易对手客户编号_tfidf_1  交易对手客户编号_tfidf_2  交易对手客户编号_tfidf_3  \\\n",
       "0           0.88464         -0.209981         -0.005231         -0.000383   \n",
       "\n",
       "   交易对手客户编号_tfidf_4  交易对手客户编号_tfidf_5  交易对手客户编号_tfidf_6  交易对手客户编号_tfidf_7  \\\n",
       "0         -0.000128         -0.000927         -0.000066         -0.000157   \n",
       "\n",
       "   交易对手客户编号_tfidf_8  交易对手客户编号_tfidf_9  交易对手客户编号_countvec_0  \\\n",
       "0          0.000022         -0.001215             0.233184   \n",
       "\n",
       "   交易对手客户编号_countvec_1  交易对手客户编号_countvec_2  交易对手客户编号_countvec_3  \\\n",
       "0            13.982342             20.18054            -0.997424   \n",
       "\n",
       "   交易对手客户编号_countvec_4  交易对手客户编号_countvec_5  交易对手客户编号_countvec_6  \\\n",
       "0             0.183728            -0.122282            -0.034816   \n",
       "\n",
       "   交易对手客户编号_countvec_7  交易对手客户编号_countvec_8  交易对手客户编号_countvec_9  \\\n",
       "0            -0.066169             0.013323            -0.001122   \n",
       "\n",
       "   客户编号_交易代码_w2v_0  客户编号_交易代码_w2v_1  客户编号_交易代码_w2v_2  客户编号_交易代码_w2v_3  \\\n",
       "0         0.697141         0.077173         0.122017         0.352532   \n",
       "\n",
       "   客户编号_交易代码_w2v_4  客户编号_交易代码_w2v_5  客户编号_交易代码_w2v_6  客户编号_交易代码_w2v_7  \\\n",
       "0        -0.393124        -0.233153         0.734571         0.529696   \n",
       "\n",
       "   客户编号_渠道代码_w2v_0  客户编号_渠道代码_w2v_1  客户编号_渠道代码_w2v_2  客户编号_渠道代码_w2v_3  \\\n",
       "0        -0.098773        -0.369754        -0.518923        -0.583388   \n",
       "\n",
       "   客户编号_渠道代码_w2v_4  客户编号_渠道代码_w2v_5  客户编号_渠道代码_w2v_6  客户编号_渠道代码_w2v_7  \\\n",
       "0        -0.756928         0.067343        -0.452553         0.450838   \n",
       "\n",
       "   客户编号_摘要信息_w2v_0  客户编号_摘要信息_w2v_1  客户编号_摘要信息_w2v_2  ...  渠道代码_摘要信息_tfidf_8  \\\n",
       "0        -0.209803         0.348169         2.220767  ...          -0.034319   \n",
       "\n",
       "   渠道代码_摘要信息_tfidf_9  渠道代码_摘要信息_countvec_0  渠道代码_摘要信息_countvec_1  \\\n",
       "0          -0.002089              1.858848              8.185632   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_2  渠道代码_摘要信息_countvec_3  渠道代码_摘要信息_countvec_4  \\\n",
       "0             -2.171361            -16.286702              0.072092   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_5  渠道代码_摘要信息_countvec_6  渠道代码_摘要信息_countvec_7  \\\n",
       "0             -0.356895              0.006055              1.709881   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_8  渠道代码_摘要信息_countvec_9  渠道代码_交易对手客户编号_tfidf_0  \\\n",
       "0             -0.224256             -0.140932               0.747345   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_1  渠道代码_交易对手客户编号_tfidf_2  渠道代码_交易对手客户编号_tfidf_3  \\\n",
       "0               0.084474              -0.118081              -0.087905   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_4  渠道代码_交易对手客户编号_tfidf_5  渠道代码_交易对手客户编号_tfidf_6  \\\n",
       "0              -0.029901              -0.052425               0.091839   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_7  渠道代码_交易对手客户编号_tfidf_8  渠道代码_交易对手客户编号_tfidf_9  \\\n",
       "0               0.220949              -0.014612              -0.045959   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_0  渠道代码_交易对手客户编号_countvec_1  \\\n",
       "0                  0.056557                  1.864093   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_2  渠道代码_交易对手客户编号_countvec_3  \\\n",
       "0                  6.557424                 -0.155376   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_4  渠道代码_交易对手客户编号_countvec_5  \\\n",
       "0                  1.547072                  0.360551   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_6  渠道代码_交易对手客户编号_countvec_7  \\\n",
       "0                 14.312553                  0.776877   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_8  渠道代码_交易对手客户编号_countvec_9  摘要信息_交易对手客户编号_tfidf_0  \\\n",
       "0                 -2.122171                 -0.223114               0.419106   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_1  摘要信息_交易对手客户编号_tfidf_2  摘要信息_交易对手客户编号_tfidf_3  \\\n",
       "0                 0.0166               -0.05208              -0.023534   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_4  摘要信息_交易对手客户编号_tfidf_5  摘要信息_交易对手客户编号_tfidf_6  \\\n",
       "0              -0.013723              -0.015781              -0.011465   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_7  摘要信息_交易对手客户编号_tfidf_8  摘要信息_交易对手客户编号_tfidf_9  \\\n",
       "0               0.000263               0.006329              -0.008315   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_0  摘要信息_交易对手客户编号_countvec_1  \\\n",
       "0                  0.004686                   7.08161   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_2  摘要信息_交易对手客户编号_countvec_3  \\\n",
       "0                 -0.203347                  15.55727   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_4  摘要信息_交易对手客户编号_countvec_5  \\\n",
       "0                  -0.29409                 -0.416516   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_6  摘要信息_交易对手客户编号_countvec_7  \\\n",
       "0                 -0.169363                 -0.083976   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_8  摘要信息_交易对手客户编号_countvec_9  客户编号_交易代码_渠道代码_w2v_0  \\\n",
       "0                 -0.020281                 -0.315465              1.216363   \n",
       "\n",
       "   客户编号_交易代码_渠道代码_w2v_1  客户编号_交易代码_渠道代码_w2v_2  客户编号_交易代码_渠道代码_w2v_3  \\\n",
       "0              0.387643              0.041235              0.122863   \n",
       "\n",
       "   客户编号_交易代码_渠道代码_w2v_4  客户编号_交易代码_渠道代码_w2v_5  客户编号_交易代码_渠道代码_w2v_6  \\\n",
       "0              0.089642              0.360907              0.173583   \n",
       "\n",
       "   客户编号_交易代码_渠道代码_w2v_7  客户编号_交易代码_摘要信息_w2v_0  客户编号_交易代码_摘要信息_w2v_1  \\\n",
       "0             -0.064026             -1.275527             -2.232501   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_2  客户编号_交易代码_摘要信息_w2v_3  客户编号_交易代码_摘要信息_w2v_4  \\\n",
       "0             -1.986748               0.74615              1.797314   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_5  客户编号_交易代码_摘要信息_w2v_6  客户编号_交易代码_摘要信息_w2v_7  \\\n",
       "0              0.352396             -1.015768              0.462867   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_0  客户编号_交易代码_交易对手客户编号_w2v_1  \\\n",
       "0                  0.566154                 -0.368129   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_2  客户编号_交易代码_交易对手客户编号_w2v_3  \\\n",
       "0                 -0.174086                  -1.61998   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_4  客户编号_交易代码_交易对手客户编号_w2v_5  \\\n",
       "0                 -0.334377                  1.329901   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_6  客户编号_交易代码_交易对手客户编号_w2v_7  客户编号_渠道代码_摘要信息_w2v_0  \\\n",
       "0                 -1.552544                 -0.683254              0.606885   \n",
       "\n",
       "   客户编号_渠道代码_摘要信息_w2v_1  客户编号_渠道代码_摘要信息_w2v_2  客户编号_渠道代码_摘要信息_w2v_3  \\\n",
       "0              -0.58038                0.4367             -1.445817   \n",
       "\n",
       "   客户编号_渠道代码_摘要信息_w2v_4  客户编号_渠道代码_摘要信息_w2v_5  客户编号_渠道代码_摘要信息_w2v_6  \\\n",
       "0             -0.862692             -0.516107             -3.854006   \n",
       "\n",
       "   客户编号_渠道代码_摘要信息_w2v_7  客户编号_渠道代码_交易对手客户编号_w2v_0  客户编号_渠道代码_交易对手客户编号_w2v_1  \\\n",
       "0              1.488673                  0.283305                 -0.758342   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_2  客户编号_渠道代码_交易对手客户编号_w2v_3  \\\n",
       "0                 -1.447038                  2.477799   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_4  客户编号_渠道代码_交易对手客户编号_w2v_5  \\\n",
       "0                 -0.113421                  0.460692   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_6  客户编号_渠道代码_交易对手客户编号_w2v_7  \\\n",
       "0                 -1.110022                 -0.269635   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_0  客户编号_摘要信息_交易对手客户编号_w2v_1  \\\n",
       "0                  0.127737                 -1.632896   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_2  客户编号_摘要信息_交易对手客户编号_w2v_3  \\\n",
       "0                  2.295154                  0.965088   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_4  客户编号_摘要信息_交易对手客户编号_w2v_5  \\\n",
       "0                 -0.534077                  2.403713   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_6  客户编号_摘要信息_交易对手客户编号_w2v_7  \n",
       "0                 -0.769188                  2.437117  \n",
       "\n",
       "[1 rows x 281 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fncl_tr_dtal_text = FNCL_TR_DTAL_text()\n",
    "print(fncl_tr_dtal_text.shape)\n",
    "fncl_tr_dtal_text.head(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "106c9599-1a27-4ccc-b435-18df061530f1",
   "metadata": {},
   "source": [
    "# 四张特征表聚合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "e36e791e-a214-4102-8835-1a7e67828ed3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:18:43.123353Z",
     "iopub.status.busy": "2024-11-11T03:18:43.122873Z",
     "iopub.status.idle": "2024-11-11T03:18:44.446826Z",
     "msg_id": "7b28fe60-ae50-4d91-803a-5b1ede59d146",
     "shell.execute_reply": "2024-11-11T03:18:44.446052Z",
     "shell.execute_reply.started": "2024-11-11T03:18:43.123321Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 607)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>注册资本</th>\n",
       "      <th>是否长期经营</th>\n",
       "      <th>经营成立时间是否相等</th>\n",
       "      <th>注册资金过小</th>\n",
       "      <th>经营是否已过期</th>\n",
       "      <th>剩余经营天数_天</th>\n",
       "      <th>已经营天数_天</th>\n",
       "      <th>当期经营期限总天数_天</th>\n",
       "      <th>自成立经营期限总天数_天</th>\n",
       "      <th>已成立天数_天</th>\n",
       "      <th>再次经营_天</th>\n",
       "      <th>剩余经营天数_月</th>\n",
       "      <th>已经营天数_月</th>\n",
       "      <th>当期经营期限总天数_月</th>\n",
       "      <th>自成立经营期限总天数_月</th>\n",
       "      <th>已成立天数_月</th>\n",
       "      <th>再次经营_月</th>\n",
       "      <th>剩余经营天数_年</th>\n",
       "      <th>已经营天数_年</th>\n",
       "      <th>当期经营期限总天数_年</th>\n",
       "      <th>自成立经营期限总天数_年</th>\n",
       "      <th>已成立天数_年</th>\n",
       "      <th>再次经营_年</th>\n",
       "      <th>经营状态_编码</th>\n",
       "      <th>企业（机构）类型编码_频数是否前10</th>\n",
       "      <th>企业（机构）类型编码_频数是否后20</th>\n",
       "      <th>企业（机构）类型编码_是否频数最高2类</th>\n",
       "      <th>所在省份编码_频数是否前5</th>\n",
       "      <th>所在省份编码_频数是否后5</th>\n",
       "      <th>企业（机构）类型编码_是否坏率最高2类</th>\n",
       "      <th>国民经济行业代码_频数是否前5</th>\n",
       "      <th>国民经济行业代码_频数是否后230</th>\n",
       "      <th>企业（机构）类型编码_分箱</th>\n",
       "      <th>所在省份编码_分箱</th>\n",
       "      <th>国民经济行业代码_分箱</th>\n",
       "      <th>法定代表人相关企业个数</th>\n",
       "      <th>法人涉足企业类型</th>\n",
       "      <th>法人涉足国民经济行业代码</th>\n",
       "      <th>法人跨省个数</th>\n",
       "      <th>法定代表人_tfidf_0</th>\n",
       "      <th>法定代表人_tfidf_1</th>\n",
       "      <th>法定代表人_tfidf_2</th>\n",
       "      <th>法定代表人_tfidf_3</th>\n",
       "      <th>法定代表人_tfidf_4</th>\n",
       "      <th>法定代表人_tfidf_5</th>\n",
       "      <th>法定代表人_tfidf_6</th>\n",
       "      <th>法定代表人_tfidf_7</th>\n",
       "      <th>法定代表人_tfidf_8</th>\n",
       "      <th>法定代表人_tfidf_9</th>\n",
       "      <th>法定代表人_countvec_0</th>\n",
       "      <th>法定代表人_countvec_1</th>\n",
       "      <th>法定代表人_countvec_2</th>\n",
       "      <th>法定代表人_countvec_3</th>\n",
       "      <th>法定代表人_countvec_4</th>\n",
       "      <th>法定代表人_countvec_5</th>\n",
       "      <th>法定代表人_countvec_6</th>\n",
       "      <th>法定代表人_countvec_7</th>\n",
       "      <th>法定代表人_countvec_8</th>\n",
       "      <th>法定代表人_countvec_9</th>\n",
       "      <th>企业（机构）类型编码_tfidf_0</th>\n",
       "      <th>企业（机构）类型编码_tfidf_1</th>\n",
       "      <th>企业（机构）类型编码_tfidf_2</th>\n",
       "      <th>企业（机构）类型编码_tfidf_3</th>\n",
       "      <th>企业（机构）类型编码_tfidf_4</th>\n",
       "      <th>企业（机构）类型编码_tfidf_5</th>\n",
       "      <th>企业（机构）类型编码_tfidf_6</th>\n",
       "      <th>企业（机构）类型编码_tfidf_7</th>\n",
       "      <th>企业（机构）类型编码_tfidf_8</th>\n",
       "      <th>企业（机构）类型编码_tfidf_9</th>\n",
       "      <th>企业（机构）类型编码_countvec_0</th>\n",
       "      <th>企业（机构）类型编码_countvec_1</th>\n",
       "      <th>企业（机构）类型编码_countvec_2</th>\n",
       "      <th>企业（机构）类型编码_countvec_3</th>\n",
       "      <th>企业（机构）类型编码_countvec_4</th>\n",
       "      <th>企业（机构）类型编码_countvec_5</th>\n",
       "      <th>企业（机构）类型编码_countvec_6</th>\n",
       "      <th>企业（机构）类型编码_countvec_7</th>\n",
       "      <th>企业（机构）类型编码_countvec_8</th>\n",
       "      <th>企业（机构）类型编码_countvec_9</th>\n",
       "      <th>所在省份编码_tfidf_0</th>\n",
       "      <th>所在省份编码_tfidf_1</th>\n",
       "      <th>所在省份编码_tfidf_2</th>\n",
       "      <th>所在省份编码_tfidf_3</th>\n",
       "      <th>所在省份编码_tfidf_4</th>\n",
       "      <th>所在省份编码_tfidf_5</th>\n",
       "      <th>所在省份编码_tfidf_6</th>\n",
       "      <th>所在省份编码_tfidf_7</th>\n",
       "      <th>所在省份编码_tfidf_8</th>\n",
       "      <th>所在省份编码_tfidf_9</th>\n",
       "      <th>所在省份编码_countvec_0</th>\n",
       "      <th>所在省份编码_countvec_1</th>\n",
       "      <th>所在省份编码_countvec_2</th>\n",
       "      <th>所在省份编码_countvec_3</th>\n",
       "      <th>所在省份编码_countvec_4</th>\n",
       "      <th>所在省份编码_countvec_5</th>\n",
       "      <th>所在省份编码_countvec_6</th>\n",
       "      <th>所在省份编码_countvec_7</th>\n",
       "      <th>所在省份编码_countvec_8</th>\n",
       "      <th>所在省份编码_countvec_9</th>\n",
       "      <th>...</th>\n",
       "      <th>渠道代码_摘要信息_tfidf_8</th>\n",
       "      <th>渠道代码_摘要信息_tfidf_9</th>\n",
       "      <th>渠道代码_摘要信息_countvec_0</th>\n",
       "      <th>渠道代码_摘要信息_countvec_1</th>\n",
       "      <th>渠道代码_摘要信息_countvec_2</th>\n",
       "      <th>渠道代码_摘要信息_countvec_3</th>\n",
       "      <th>渠道代码_摘要信息_countvec_4</th>\n",
       "      <th>渠道代码_摘要信息_countvec_5</th>\n",
       "      <th>渠道代码_摘要信息_countvec_6</th>\n",
       "      <th>渠道代码_摘要信息_countvec_7</th>\n",
       "      <th>渠道代码_摘要信息_countvec_8</th>\n",
       "      <th>渠道代码_摘要信息_countvec_9</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_0</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_1</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_2</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_3</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_4</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_5</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_6</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_7</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_8</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_9</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_0</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_1</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_2</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_3</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_4</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_5</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_6</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_7</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_8</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_9</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_0</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_1</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_2</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_3</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_4</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_5</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_6</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_7</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_8</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_9</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_0</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_1</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_2</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_3</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_4</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_5</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_6</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_7</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_8</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_9</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_0</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_1</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_2</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_3</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_4</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_5</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_6</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_7</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_0</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_1</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_2</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_3</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_4</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_5</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_6</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_7</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_7</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_0</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_1</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_2</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_3</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_4</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_5</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_6</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_7</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_7</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>182d6a854532dd26a1b111e77bd501f4</td>\n",
       "      <td>690521.61</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>14574</td>\n",
       "      <td>3675</td>\n",
       "      <td>18249</td>\n",
       "      <td>18249</td>\n",
       "      <td>3675</td>\n",
       "      <td>0</td>\n",
       "      <td>480</td>\n",
       "      <td>120</td>\n",
       "      <td>600</td>\n",
       "      <td>600</td>\n",
       "      <td>120</td>\n",
       "      <td>0</td>\n",
       "      <td>40.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-5.836406e-33</td>\n",
       "      <td>-1.440541e-32</td>\n",
       "      <td>6.185603e-31</td>\n",
       "      <td>-3.699480e-30</td>\n",
       "      <td>1.660255e-30</td>\n",
       "      <td>-2.937689e-30</td>\n",
       "      <td>3.197309e-30</td>\n",
       "      <td>-1.102143e-30</td>\n",
       "      <td>-2.709408e-30</td>\n",
       "      <td>1.291601e-29</td>\n",
       "      <td>-5.836406e-33</td>\n",
       "      <td>-1.440541e-32</td>\n",
       "      <td>6.185603e-31</td>\n",
       "      <td>-3.699480e-30</td>\n",
       "      <td>1.660255e-30</td>\n",
       "      <td>-2.937689e-30</td>\n",
       "      <td>3.197309e-30</td>\n",
       "      <td>-1.102143e-30</td>\n",
       "      <td>-2.709408e-30</td>\n",
       "      <td>1.291601e-29</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-2.426068e-17</td>\n",
       "      <td>-1.053226e-21</td>\n",
       "      <td>-1.867483e-28</td>\n",
       "      <td>9.566782e-33</td>\n",
       "      <td>1.061066e-34</td>\n",
       "      <td>6.700930e-34</td>\n",
       "      <td>1.710763e-32</td>\n",
       "      <td>9.444267e-36</td>\n",
       "      <td>-9.671025e-36</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-2.426068e-17</td>\n",
       "      <td>-1.053226e-21</td>\n",
       "      <td>-1.867483e-28</td>\n",
       "      <td>9.566782e-33</td>\n",
       "      <td>1.061066e-34</td>\n",
       "      <td>6.700930e-34</td>\n",
       "      <td>1.710763e-32</td>\n",
       "      <td>9.444267e-36</td>\n",
       "      <td>-9.671025e-36</td>\n",
       "      <td>-7.574629e-18</td>\n",
       "      <td>-4.433243e-18</td>\n",
       "      <td>-2.362524e-19</td>\n",
       "      <td>1.495586e-18</td>\n",
       "      <td>4.368686e-16</td>\n",
       "      <td>4.767665e-16</td>\n",
       "      <td>-8.066189e-15</td>\n",
       "      <td>7.180238e-13</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-3.755863e-14</td>\n",
       "      <td>-7.574629e-18</td>\n",
       "      <td>-4.433243e-18</td>\n",
       "      <td>-2.362524e-19</td>\n",
       "      <td>1.495586e-18</td>\n",
       "      <td>4.368686e-16</td>\n",
       "      <td>4.767665e-16</td>\n",
       "      <td>-8.066189e-15</td>\n",
       "      <td>7.180238e-13</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-3.755863e-14</td>\n",
       "      <td>...</td>\n",
       "      <td>0.032039</td>\n",
       "      <td>-0.102822</td>\n",
       "      <td>15.544198</td>\n",
       "      <td>174.289215</td>\n",
       "      <td>-44.023341</td>\n",
       "      <td>-57.271956</td>\n",
       "      <td>2.141641</td>\n",
       "      <td>-2.568231</td>\n",
       "      <td>-2.022687</td>\n",
       "      <td>-91.639821</td>\n",
       "      <td>31.556433</td>\n",
       "      <td>-2.291102</td>\n",
       "      <td>0.373648</td>\n",
       "      <td>0.547778</td>\n",
       "      <td>-0.160473</td>\n",
       "      <td>-0.027813</td>\n",
       "      <td>-0.008258</td>\n",
       "      <td>-0.007374</td>\n",
       "      <td>-0.041158</td>\n",
       "      <td>-0.012754</td>\n",
       "      <td>0.036373</td>\n",
       "      <td>0.167673</td>\n",
       "      <td>1.199408</td>\n",
       "      <td>55.197004</td>\n",
       "      <td>236.809714</td>\n",
       "      <td>-7.440004</td>\n",
       "      <td>0.886514</td>\n",
       "      <td>-32.8267</td>\n",
       "      <td>70.314523</td>\n",
       "      <td>13.949886</td>\n",
       "      <td>-4.547821</td>\n",
       "      <td>27.195111</td>\n",
       "      <td>0.243561</td>\n",
       "      <td>0.505371</td>\n",
       "      <td>-0.028903</td>\n",
       "      <td>-0.073656</td>\n",
       "      <td>0.017654</td>\n",
       "      <td>-0.007088</td>\n",
       "      <td>-0.011645</td>\n",
       "      <td>0.239337</td>\n",
       "      <td>-0.085407</td>\n",
       "      <td>-0.022291</td>\n",
       "      <td>0.033969</td>\n",
       "      <td>157.808317</td>\n",
       "      <td>-6.15358</td>\n",
       "      <td>51.319125</td>\n",
       "      <td>-1.470159</td>\n",
       "      <td>-29.441985</td>\n",
       "      <td>21.866637</td>\n",
       "      <td>-0.628486</td>\n",
       "      <td>0.539294</td>\n",
       "      <td>19.930739</td>\n",
       "      <td>0.929519</td>\n",
       "      <td>0.041919</td>\n",
       "      <td>-0.318774</td>\n",
       "      <td>-0.035561</td>\n",
       "      <td>0.227936</td>\n",
       "      <td>0.420671</td>\n",
       "      <td>0.235241</td>\n",
       "      <td>-0.116549</td>\n",
       "      <td>-0.704036</td>\n",
       "      <td>-1.022647</td>\n",
       "      <td>-1.33967</td>\n",
       "      <td>0.55187</td>\n",
       "      <td>2.395266</td>\n",
       "      <td>-0.682805</td>\n",
       "      <td>0.417848</td>\n",
       "      <td>-0.431105</td>\n",
       "      <td>1.080258</td>\n",
       "      <td>0.155055</td>\n",
       "      <td>-0.110294</td>\n",
       "      <td>-1.295262</td>\n",
       "      <td>0.070553</td>\n",
       "      <td>1.742599</td>\n",
       "      <td>-1.454264</td>\n",
       "      <td>-0.450553</td>\n",
       "      <td>0.03435</td>\n",
       "      <td>-1.680509</td>\n",
       "      <td>-0.781761</td>\n",
       "      <td>-0.693793</td>\n",
       "      <td>-0.242717</td>\n",
       "      <td>0.461331</td>\n",
       "      <td>-2.490363</td>\n",
       "      <td>1.221877</td>\n",
       "      <td>0.030599</td>\n",
       "      <td>-0.467518</td>\n",
       "      <td>-1.226284</td>\n",
       "      <td>2.05362</td>\n",
       "      <td>0.080761</td>\n",
       "      <td>0.92395</td>\n",
       "      <td>-1.016611</td>\n",
       "      <td>-0.644857</td>\n",
       "      <td>0.364926</td>\n",
       "      <td>-2.130036</td>\n",
       "      <td>1.586251</td>\n",
       "      <td>0.282562</td>\n",
       "      <td>-0.388937</td>\n",
       "      <td>1.244417</td>\n",
       "      <td>0.011919</td>\n",
       "      <td>1.579562</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1 rows × 607 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号       注册资本  是否长期经营  经营成立时间是否相等  注册资金过小  \\\n",
       "0  182d6a854532dd26a1b111e77bd501f4  690521.61       0           1       0   \n",
       "\n",
       "   经营是否已过期  剩余经营天数_天  已经营天数_天  当期经营期限总天数_天  自成立经营期限总天数_天  已成立天数_天  再次经营_天  \\\n",
       "0        0     14574     3675        18249         18249     3675       0   \n",
       "\n",
       "   剩余经营天数_月  已经营天数_月  当期经营期限总天数_月  自成立经营期限总天数_月  已成立天数_月  再次经营_月  剩余经营天数_年  \\\n",
       "0       480      120          600           600      120       0      40.0   \n",
       "\n",
       "   已经营天数_年  当期经营期限总天数_年  自成立经营期限总天数_年  已成立天数_年  再次经营_年  经营状态_编码  \\\n",
       "0     10.0         50.0          50.0     10.0     0.0        1   \n",
       "\n",
       "   企业（机构）类型编码_频数是否前10  企业（机构）类型编码_频数是否后20  企业（机构）类型编码_是否频数最高2类  所在省份编码_频数是否前5  \\\n",
       "0                   1                   0                    1              0   \n",
       "\n",
       "   所在省份编码_频数是否后5  企业（机构）类型编码_是否坏率最高2类  国民经济行业代码_频数是否前5  国民经济行业代码_频数是否后230  \\\n",
       "0              0                    0                0                  0   \n",
       "\n",
       "   企业（机构）类型编码_分箱  所在省份编码_分箱  国民经济行业代码_分箱  法定代表人相关企业个数  法人涉足企业类型  法人涉足国民经济行业代码  \\\n",
       "0              1          3            2            1         1             1   \n",
       "\n",
       "   法人跨省个数  法定代表人_tfidf_0  法定代表人_tfidf_1  法定代表人_tfidf_2  法定代表人_tfidf_3  \\\n",
       "0       1  -5.836406e-33  -1.440541e-32   6.185603e-31  -3.699480e-30   \n",
       "\n",
       "   法定代表人_tfidf_4  法定代表人_tfidf_5  法定代表人_tfidf_6  法定代表人_tfidf_7  法定代表人_tfidf_8  \\\n",
       "0   1.660255e-30  -2.937689e-30   3.197309e-30  -1.102143e-30  -2.709408e-30   \n",
       "\n",
       "   法定代表人_tfidf_9  法定代表人_countvec_0  法定代表人_countvec_1  法定代表人_countvec_2  \\\n",
       "0   1.291601e-29     -5.836406e-33     -1.440541e-32      6.185603e-31   \n",
       "\n",
       "   法定代表人_countvec_3  法定代表人_countvec_4  法定代表人_countvec_5  法定代表人_countvec_6  \\\n",
       "0     -3.699480e-30      1.660255e-30     -2.937689e-30      3.197309e-30   \n",
       "\n",
       "   法定代表人_countvec_7  法定代表人_countvec_8  法定代表人_countvec_9  企业（机构）类型编码_tfidf_0  \\\n",
       "0     -1.102143e-30     -2.709408e-30      1.291601e-29                 1.0   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_1  企业（机构）类型编码_tfidf_2  企业（机构）类型编码_tfidf_3  \\\n",
       "0       -2.426068e-17       -1.053226e-21       -1.867483e-28   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_4  企业（机构）类型编码_tfidf_5  企业（机构）类型编码_tfidf_6  \\\n",
       "0        9.566782e-33        1.061066e-34        6.700930e-34   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_7  企业（机构）类型编码_tfidf_8  企业（机构）类型编码_tfidf_9  \\\n",
       "0        1.710763e-32        9.444267e-36       -9.671025e-36   \n",
       "\n",
       "   企业（机构）类型编码_countvec_0  企业（机构）类型编码_countvec_1  企业（机构）类型编码_countvec_2  \\\n",
       "0                    1.0          -2.426068e-17          -1.053226e-21   \n",
       "\n",
       "   企业（机构）类型编码_countvec_3  企业（机构）类型编码_countvec_4  企业（机构）类型编码_countvec_5  \\\n",
       "0          -1.867483e-28           9.566782e-33           1.061066e-34   \n",
       "\n",
       "   企业（机构）类型编码_countvec_6  企业（机构）类型编码_countvec_7  企业（机构）类型编码_countvec_8  \\\n",
       "0           6.700930e-34           1.710763e-32           9.444267e-36   \n",
       "\n",
       "   企业（机构）类型编码_countvec_9  所在省份编码_tfidf_0  所在省份编码_tfidf_1  所在省份编码_tfidf_2  \\\n",
       "0          -9.671025e-36   -7.574629e-18   -4.433243e-18   -2.362524e-19   \n",
       "\n",
       "   所在省份编码_tfidf_3  所在省份编码_tfidf_4  所在省份编码_tfidf_5  所在省份编码_tfidf_6  \\\n",
       "0    1.495586e-18    4.368686e-16    4.767665e-16   -8.066189e-15   \n",
       "\n",
       "   所在省份编码_tfidf_7  所在省份编码_tfidf_8  所在省份编码_tfidf_9  所在省份编码_countvec_0  \\\n",
       "0    7.180238e-13             1.0   -3.755863e-14      -7.574629e-18   \n",
       "\n",
       "   所在省份编码_countvec_1  所在省份编码_countvec_2  所在省份编码_countvec_3  所在省份编码_countvec_4  \\\n",
       "0      -4.433243e-18      -2.362524e-19       1.495586e-18       4.368686e-16   \n",
       "\n",
       "   所在省份编码_countvec_5  所在省份编码_countvec_6  所在省份编码_countvec_7  所在省份编码_countvec_8  \\\n",
       "0       4.767665e-16      -8.066189e-15       7.180238e-13                1.0   \n",
       "\n",
       "   所在省份编码_countvec_9  ...  渠道代码_摘要信息_tfidf_8  渠道代码_摘要信息_tfidf_9  \\\n",
       "0      -3.755863e-14  ...           0.032039          -0.102822   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_0  渠道代码_摘要信息_countvec_1  渠道代码_摘要信息_countvec_2  \\\n",
       "0             15.544198            174.289215            -44.023341   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_3  渠道代码_摘要信息_countvec_4  渠道代码_摘要信息_countvec_5  \\\n",
       "0            -57.271956              2.141641             -2.568231   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_6  渠道代码_摘要信息_countvec_7  渠道代码_摘要信息_countvec_8  \\\n",
       "0             -2.022687            -91.639821             31.556433   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_9  渠道代码_交易对手客户编号_tfidf_0  渠道代码_交易对手客户编号_tfidf_1  \\\n",
       "0             -2.291102               0.373648               0.547778   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_2  渠道代码_交易对手客户编号_tfidf_3  渠道代码_交易对手客户编号_tfidf_4  \\\n",
       "0              -0.160473              -0.027813              -0.008258   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_5  渠道代码_交易对手客户编号_tfidf_6  渠道代码_交易对手客户编号_tfidf_7  \\\n",
       "0              -0.007374              -0.041158              -0.012754   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_8  渠道代码_交易对手客户编号_tfidf_9  渠道代码_交易对手客户编号_countvec_0  \\\n",
       "0               0.036373               0.167673                  1.199408   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_1  渠道代码_交易对手客户编号_countvec_2  \\\n",
       "0                 55.197004                236.809714   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_3  渠道代码_交易对手客户编号_countvec_4  \\\n",
       "0                 -7.440004                  0.886514   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_5  渠道代码_交易对手客户编号_countvec_6  \\\n",
       "0                  -32.8267                 70.314523   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_7  渠道代码_交易对手客户编号_countvec_8  \\\n",
       "0                 13.949886                 -4.547821   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_9  摘要信息_交易对手客户编号_tfidf_0  摘要信息_交易对手客户编号_tfidf_1  \\\n",
       "0                 27.195111               0.243561               0.505371   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_2  摘要信息_交易对手客户编号_tfidf_3  摘要信息_交易对手客户编号_tfidf_4  \\\n",
       "0              -0.028903              -0.073656               0.017654   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_5  摘要信息_交易对手客户编号_tfidf_6  摘要信息_交易对手客户编号_tfidf_7  \\\n",
       "0              -0.007088              -0.011645               0.239337   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_8  摘要信息_交易对手客户编号_tfidf_9  摘要信息_交易对手客户编号_countvec_0  \\\n",
       "0              -0.085407              -0.022291                  0.033969   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_1  摘要信息_交易对手客户编号_countvec_2  \\\n",
       "0                157.808317                  -6.15358   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_3  摘要信息_交易对手客户编号_countvec_4  \\\n",
       "0                 51.319125                 -1.470159   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_5  摘要信息_交易对手客户编号_countvec_6  \\\n",
       "0                -29.441985                 21.866637   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_7  摘要信息_交易对手客户编号_countvec_8  \\\n",
       "0                 -0.628486                  0.539294   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_9  客户编号_交易代码_渠道代码_w2v_0  客户编号_交易代码_渠道代码_w2v_1  \\\n",
       "0                 19.930739              0.929519              0.041919   \n",
       "\n",
       "   客户编号_交易代码_渠道代码_w2v_2  客户编号_交易代码_渠道代码_w2v_3  客户编号_交易代码_渠道代码_w2v_4  \\\n",
       "0             -0.318774             -0.035561              0.227936   \n",
       "\n",
       "   客户编号_交易代码_渠道代码_w2v_5  客户编号_交易代码_渠道代码_w2v_6  客户编号_交易代码_渠道代码_w2v_7  \\\n",
       "0              0.420671              0.235241             -0.116549   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_0  客户编号_交易代码_摘要信息_w2v_1  客户编号_交易代码_摘要信息_w2v_2  \\\n",
       "0             -0.704036             -1.022647              -1.33967   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_3  客户编号_交易代码_摘要信息_w2v_4  客户编号_交易代码_摘要信息_w2v_5  \\\n",
       "0               0.55187              2.395266             -0.682805   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_6  客户编号_交易代码_摘要信息_w2v_7  客户编号_交易代码_交易对手客户编号_w2v_0  \\\n",
       "0              0.417848             -0.431105                  1.080258   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_1  客户编号_交易代码_交易对手客户编号_w2v_2  \\\n",
       "0                  0.155055                 -0.110294   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_3  客户编号_交易代码_交易对手客户编号_w2v_4  \\\n",
       "0                 -1.295262                  0.070553   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_5  客户编号_交易代码_交易对手客户编号_w2v_6  \\\n",
       "0                  1.742599                 -1.454264   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_7  客户编号_渠道代码_摘要信息_w2v_0  客户编号_渠道代码_摘要信息_w2v_1  \\\n",
       "0                 -0.450553               0.03435             -1.680509   \n",
       "\n",
       "   客户编号_渠道代码_摘要信息_w2v_2  客户编号_渠道代码_摘要信息_w2v_3  客户编号_渠道代码_摘要信息_w2v_4  \\\n",
       "0             -0.781761             -0.693793             -0.242717   \n",
       "\n",
       "   客户编号_渠道代码_摘要信息_w2v_5  客户编号_渠道代码_摘要信息_w2v_6  客户编号_渠道代码_摘要信息_w2v_7  \\\n",
       "0              0.461331             -2.490363              1.221877   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_0  客户编号_渠道代码_交易对手客户编号_w2v_1  \\\n",
       "0                  0.030599                 -0.467518   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_2  客户编号_渠道代码_交易对手客户编号_w2v_3  \\\n",
       "0                 -1.226284                   2.05362   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_4  客户编号_渠道代码_交易对手客户编号_w2v_5  \\\n",
       "0                  0.080761                   0.92395   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_6  客户编号_渠道代码_交易对手客户编号_w2v_7  \\\n",
       "0                 -1.016611                 -0.644857   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_0  客户编号_摘要信息_交易对手客户编号_w2v_1  \\\n",
       "0                  0.364926                 -2.130036   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_2  客户编号_摘要信息_交易对手客户编号_w2v_3  \\\n",
       "0                  1.586251                  0.282562   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_4  客户编号_摘要信息_交易对手客户编号_w2v_5  \\\n",
       "0                 -0.388937                  1.244417   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_6  客户编号_摘要信息_交易对手客户编号_w2v_7  \n",
       "0                  0.011919                  1.579562  \n",
       "\n",
       "[1 rows x 607 columns]"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_hyy = basic_info.merge(basic_text, how = 'left', on = '客户编号')\n",
    "feature_hyy = feature_hyy.merge(fncl_tr_dtal_info_all, how = 'left', on = '客户编号')\n",
    "feature_hyy = feature_hyy.merge(fncl_tr_dtal_text, how = 'left', on = '客户编号')\n",
    "print(feature_hyy.shape)\n",
    "feature_hyy.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "45698487-4c69-4fe8-b3e0-ff027e7425a5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:18:47.996049Z",
     "iopub.status.busy": "2024-11-11T03:18:47.995504Z",
     "iopub.status.idle": "2024-11-11T03:18:48.096411Z",
     "msg_id": "5f15d2fc-895d-468d-b3e7-f8db5884d3ec",
     "shell.execute_reply": "2024-11-11T03:18:48.095650Z",
     "shell.execute_reply.started": "2024-11-11T03:18:47.996016Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 1)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b1d244a25a82adb7beafe33fe971402c</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>85b1ab1270516d2ebe21ed00c6abbf27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ef194610bdbecdea9af3cc23bceba8b2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1220f9592fdd0b3fa9bbbd90e6d69d84</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号\n",
       "0  158a8d99bec2a2b652a6de45a2b52ec9\n",
       "1  b1d244a25a82adb7beafe33fe971402c\n",
       "2  85b1ab1270516d2ebe21ed00c6abbf27\n",
       "3  ef194610bdbecdea9af3cc23bceba8b2\n",
       "4  1220f9592fdd0b3fa9bbbd90e6d69d84"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_name = 'XW_ENTINFO_TARGET'\n",
    "TARGET = get_data(file_name, num_rows=None)\n",
    "TARGET = TARGET.drop(['数据日期', 'FLAG', 'is_train'], axis = 1)\n",
    "print(TARGET.shape)\n",
    "TARGET.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "ca522884-cbb3-4612-af8e-d08f6e3a3c52",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:18:48.195132Z",
     "iopub.status.busy": "2024-11-11T03:18:48.194555Z",
     "iopub.status.idle": "2024-11-11T03:18:49.539065Z",
     "msg_id": "6c936608-d83f-40ad-b2f8-0b875ca98a2f",
     "shell.execute_reply": "2024-11-11T03:18:49.538281Z",
     "shell.execute_reply.started": "2024-11-11T03:18:48.195103Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>客户编号</th>\n",
       "      <th>注册资本</th>\n",
       "      <th>是否长期经营</th>\n",
       "      <th>经营成立时间是否相等</th>\n",
       "      <th>注册资金过小</th>\n",
       "      <th>经营是否已过期</th>\n",
       "      <th>剩余经营天数_天</th>\n",
       "      <th>已经营天数_天</th>\n",
       "      <th>当期经营期限总天数_天</th>\n",
       "      <th>自成立经营期限总天数_天</th>\n",
       "      <th>已成立天数_天</th>\n",
       "      <th>再次经营_天</th>\n",
       "      <th>剩余经营天数_月</th>\n",
       "      <th>已经营天数_月</th>\n",
       "      <th>当期经营期限总天数_月</th>\n",
       "      <th>自成立经营期限总天数_月</th>\n",
       "      <th>已成立天数_月</th>\n",
       "      <th>再次经营_月</th>\n",
       "      <th>剩余经营天数_年</th>\n",
       "      <th>已经营天数_年</th>\n",
       "      <th>当期经营期限总天数_年</th>\n",
       "      <th>自成立经营期限总天数_年</th>\n",
       "      <th>已成立天数_年</th>\n",
       "      <th>再次经营_年</th>\n",
       "      <th>经营状态_编码</th>\n",
       "      <th>企业（机构）类型编码_频数是否前10</th>\n",
       "      <th>企业（机构）类型编码_频数是否后20</th>\n",
       "      <th>企业（机构）类型编码_是否频数最高2类</th>\n",
       "      <th>所在省份编码_频数是否前5</th>\n",
       "      <th>所在省份编码_频数是否后5</th>\n",
       "      <th>企业（机构）类型编码_是否坏率最高2类</th>\n",
       "      <th>国民经济行业代码_频数是否前5</th>\n",
       "      <th>国民经济行业代码_频数是否后230</th>\n",
       "      <th>企业（机构）类型编码_分箱</th>\n",
       "      <th>所在省份编码_分箱</th>\n",
       "      <th>国民经济行业代码_分箱</th>\n",
       "      <th>法定代表人相关企业个数</th>\n",
       "      <th>法人涉足企业类型</th>\n",
       "      <th>法人涉足国民经济行业代码</th>\n",
       "      <th>法人跨省个数</th>\n",
       "      <th>法定代表人_tfidf_0</th>\n",
       "      <th>法定代表人_tfidf_1</th>\n",
       "      <th>法定代表人_tfidf_2</th>\n",
       "      <th>法定代表人_tfidf_3</th>\n",
       "      <th>法定代表人_tfidf_4</th>\n",
       "      <th>法定代表人_tfidf_5</th>\n",
       "      <th>法定代表人_tfidf_6</th>\n",
       "      <th>法定代表人_tfidf_7</th>\n",
       "      <th>法定代表人_tfidf_8</th>\n",
       "      <th>法定代表人_tfidf_9</th>\n",
       "      <th>法定代表人_countvec_0</th>\n",
       "      <th>法定代表人_countvec_1</th>\n",
       "      <th>法定代表人_countvec_2</th>\n",
       "      <th>法定代表人_countvec_3</th>\n",
       "      <th>法定代表人_countvec_4</th>\n",
       "      <th>法定代表人_countvec_5</th>\n",
       "      <th>法定代表人_countvec_6</th>\n",
       "      <th>法定代表人_countvec_7</th>\n",
       "      <th>法定代表人_countvec_8</th>\n",
       "      <th>法定代表人_countvec_9</th>\n",
       "      <th>企业（机构）类型编码_tfidf_0</th>\n",
       "      <th>企业（机构）类型编码_tfidf_1</th>\n",
       "      <th>企业（机构）类型编码_tfidf_2</th>\n",
       "      <th>企业（机构）类型编码_tfidf_3</th>\n",
       "      <th>企业（机构）类型编码_tfidf_4</th>\n",
       "      <th>企业（机构）类型编码_tfidf_5</th>\n",
       "      <th>企业（机构）类型编码_tfidf_6</th>\n",
       "      <th>企业（机构）类型编码_tfidf_7</th>\n",
       "      <th>企业（机构）类型编码_tfidf_8</th>\n",
       "      <th>企业（机构）类型编码_tfidf_9</th>\n",
       "      <th>企业（机构）类型编码_countvec_0</th>\n",
       "      <th>企业（机构）类型编码_countvec_1</th>\n",
       "      <th>企业（机构）类型编码_countvec_2</th>\n",
       "      <th>企业（机构）类型编码_countvec_3</th>\n",
       "      <th>企业（机构）类型编码_countvec_4</th>\n",
       "      <th>企业（机构）类型编码_countvec_5</th>\n",
       "      <th>企业（机构）类型编码_countvec_6</th>\n",
       "      <th>企业（机构）类型编码_countvec_7</th>\n",
       "      <th>企业（机构）类型编码_countvec_8</th>\n",
       "      <th>企业（机构）类型编码_countvec_9</th>\n",
       "      <th>所在省份编码_tfidf_0</th>\n",
       "      <th>所在省份编码_tfidf_1</th>\n",
       "      <th>所在省份编码_tfidf_2</th>\n",
       "      <th>所在省份编码_tfidf_3</th>\n",
       "      <th>所在省份编码_tfidf_4</th>\n",
       "      <th>所在省份编码_tfidf_5</th>\n",
       "      <th>所在省份编码_tfidf_6</th>\n",
       "      <th>所在省份编码_tfidf_7</th>\n",
       "      <th>所在省份编码_tfidf_8</th>\n",
       "      <th>所在省份编码_tfidf_9</th>\n",
       "      <th>所在省份编码_countvec_0</th>\n",
       "      <th>所在省份编码_countvec_1</th>\n",
       "      <th>所在省份编码_countvec_2</th>\n",
       "      <th>所在省份编码_countvec_3</th>\n",
       "      <th>所在省份编码_countvec_4</th>\n",
       "      <th>所在省份编码_countvec_5</th>\n",
       "      <th>所在省份编码_countvec_6</th>\n",
       "      <th>所在省份编码_countvec_7</th>\n",
       "      <th>所在省份编码_countvec_8</th>\n",
       "      <th>所在省份编码_countvec_9</th>\n",
       "      <th>...</th>\n",
       "      <th>渠道代码_摘要信息_tfidf_8</th>\n",
       "      <th>渠道代码_摘要信息_tfidf_9</th>\n",
       "      <th>渠道代码_摘要信息_countvec_0</th>\n",
       "      <th>渠道代码_摘要信息_countvec_1</th>\n",
       "      <th>渠道代码_摘要信息_countvec_2</th>\n",
       "      <th>渠道代码_摘要信息_countvec_3</th>\n",
       "      <th>渠道代码_摘要信息_countvec_4</th>\n",
       "      <th>渠道代码_摘要信息_countvec_5</th>\n",
       "      <th>渠道代码_摘要信息_countvec_6</th>\n",
       "      <th>渠道代码_摘要信息_countvec_7</th>\n",
       "      <th>渠道代码_摘要信息_countvec_8</th>\n",
       "      <th>渠道代码_摘要信息_countvec_9</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_0</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_1</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_2</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_3</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_4</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_5</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_6</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_7</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_8</th>\n",
       "      <th>渠道代码_交易对手客户编号_tfidf_9</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_0</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_1</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_2</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_3</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_4</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_5</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_6</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_7</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_8</th>\n",
       "      <th>渠道代码_交易对手客户编号_countvec_9</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_0</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_1</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_2</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_3</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_4</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_5</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_6</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_7</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_8</th>\n",
       "      <th>摘要信息_交易对手客户编号_tfidf_9</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_0</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_1</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_2</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_3</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_4</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_5</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_6</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_7</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_8</th>\n",
       "      <th>摘要信息_交易对手客户编号_countvec_9</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_0</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_1</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_2</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_3</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_4</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_5</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_6</th>\n",
       "      <th>客户编号_交易代码_渠道代码_w2v_7</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_0</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_1</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_2</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_3</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_4</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_5</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_6</th>\n",
       "      <th>客户编号_交易代码_摘要信息_w2v_7</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_交易代码_交易对手客户编号_w2v_7</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_0</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_1</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_2</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_3</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_4</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_5</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_6</th>\n",
       "      <th>客户编号_渠道代码_摘要信息_w2v_7</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_渠道代码_交易对手客户编号_w2v_7</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_0</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_1</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_2</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_3</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_4</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_5</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_6</th>\n",
       "      <th>客户编号_摘要信息_交易对手客户编号_w2v_7</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>158a8d99bec2a2b652a6de45a2b52ec9</td>\n",
       "      <td>690521.61</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5655</td>\n",
       "      <td>1644</td>\n",
       "      <td>7299</td>\n",
       "      <td>7299</td>\n",
       "      <td>1644</td>\n",
       "      <td>0</td>\n",
       "      <td>186</td>\n",
       "      <td>54</td>\n",
       "      <td>240</td>\n",
       "      <td>240</td>\n",
       "      <td>54</td>\n",
       "      <td>0</td>\n",
       "      <td>15.500000</td>\n",
       "      <td>4.500000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>4.500000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3.112701e-32</td>\n",
       "      <td>-3.182684e-31</td>\n",
       "      <td>1.459411e-30</td>\n",
       "      <td>-3.919896e-30</td>\n",
       "      <td>-1.098859e-30</td>\n",
       "      <td>-2.575129e-30</td>\n",
       "      <td>1.086710e-29</td>\n",
       "      <td>-1.080662e-29</td>\n",
       "      <td>-1.030773e-29</td>\n",
       "      <td>-6.907220e-30</td>\n",
       "      <td>3.112701e-32</td>\n",
       "      <td>-3.182684e-31</td>\n",
       "      <td>1.459411e-30</td>\n",
       "      <td>-3.919896e-30</td>\n",
       "      <td>-1.098859e-30</td>\n",
       "      <td>-2.575129e-30</td>\n",
       "      <td>1.086710e-29</td>\n",
       "      <td>-1.080662e-29</td>\n",
       "      <td>-1.030773e-29</td>\n",
       "      <td>-6.907220e-30</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>-2.426068e-17</td>\n",
       "      <td>-1.053226e-21</td>\n",
       "      <td>-1.867483e-28</td>\n",
       "      <td>9.566782e-33</td>\n",
       "      <td>1.061066e-34</td>\n",
       "      <td>6.700930e-34</td>\n",
       "      <td>1.710763e-32</td>\n",
       "      <td>9.444267e-36</td>\n",
       "      <td>-9.671025e-36</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>-2.426068e-17</td>\n",
       "      <td>-1.053226e-21</td>\n",
       "      <td>-1.867483e-28</td>\n",
       "      <td>9.566782e-33</td>\n",
       "      <td>1.061066e-34</td>\n",
       "      <td>6.700930e-34</td>\n",
       "      <td>1.710763e-32</td>\n",
       "      <td>9.444267e-36</td>\n",
       "      <td>-9.671025e-36</td>\n",
       "      <td>8.890848e-20</td>\n",
       "      <td>5.167360e-20</td>\n",
       "      <td>3.042293e-21</td>\n",
       "      <td>1.043209e-19</td>\n",
       "      <td>1.619241e-18</td>\n",
       "      <td>1.076051e-17</td>\n",
       "      <td>5.214132e-17</td>\n",
       "      <td>-3.663833e-16</td>\n",
       "      <td>-2.029394e-15</td>\n",
       "      <td>1.134680e-13</td>\n",
       "      <td>8.890848e-20</td>\n",
       "      <td>5.167360e-20</td>\n",
       "      <td>3.042293e-21</td>\n",
       "      <td>1.043209e-19</td>\n",
       "      <td>1.619241e-18</td>\n",
       "      <td>1.076051e-17</td>\n",
       "      <td>5.214132e-17</td>\n",
       "      <td>-3.663833e-16</td>\n",
       "      <td>-2.029394e-15</td>\n",
       "      <td>1.134680e-13</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.034319</td>\n",
       "      <td>-0.002089</td>\n",
       "      <td>1.858848</td>\n",
       "      <td>8.185632</td>\n",
       "      <td>-2.171361</td>\n",
       "      <td>-16.286702</td>\n",
       "      <td>0.072092</td>\n",
       "      <td>-0.356895</td>\n",
       "      <td>0.006055</td>\n",
       "      <td>1.709881</td>\n",
       "      <td>-0.224256</td>\n",
       "      <td>-0.140932</td>\n",
       "      <td>0.747345</td>\n",
       "      <td>0.084474</td>\n",
       "      <td>-0.118081</td>\n",
       "      <td>-0.087905</td>\n",
       "      <td>-0.029901</td>\n",
       "      <td>-0.052425</td>\n",
       "      <td>0.091839</td>\n",
       "      <td>0.220949</td>\n",
       "      <td>-0.014612</td>\n",
       "      <td>-0.045959</td>\n",
       "      <td>0.056557</td>\n",
       "      <td>1.864093</td>\n",
       "      <td>6.557424</td>\n",
       "      <td>-0.155376</td>\n",
       "      <td>1.547072</td>\n",
       "      <td>0.360551</td>\n",
       "      <td>14.312553</td>\n",
       "      <td>0.776877</td>\n",
       "      <td>-2.122171</td>\n",
       "      <td>-0.223114</td>\n",
       "      <td>0.419106</td>\n",
       "      <td>0.016600</td>\n",
       "      <td>-0.052080</td>\n",
       "      <td>-0.023534</td>\n",
       "      <td>-0.013723</td>\n",
       "      <td>-0.015781</td>\n",
       "      <td>-0.011465</td>\n",
       "      <td>0.000263</td>\n",
       "      <td>0.006329</td>\n",
       "      <td>-0.008315</td>\n",
       "      <td>0.004686</td>\n",
       "      <td>7.081610</td>\n",
       "      <td>-0.203347</td>\n",
       "      <td>15.557270</td>\n",
       "      <td>-0.294090</td>\n",
       "      <td>-0.416516</td>\n",
       "      <td>-0.169363</td>\n",
       "      <td>-0.083976</td>\n",
       "      <td>-0.020281</td>\n",
       "      <td>-0.315465</td>\n",
       "      <td>1.216363</td>\n",
       "      <td>0.387643</td>\n",
       "      <td>0.041235</td>\n",
       "      <td>0.122863</td>\n",
       "      <td>0.089642</td>\n",
       "      <td>0.360907</td>\n",
       "      <td>0.173583</td>\n",
       "      <td>-0.064026</td>\n",
       "      <td>-1.275527</td>\n",
       "      <td>-2.232501</td>\n",
       "      <td>-1.986748</td>\n",
       "      <td>0.746150</td>\n",
       "      <td>1.797314</td>\n",
       "      <td>0.352396</td>\n",
       "      <td>-1.015768</td>\n",
       "      <td>0.462867</td>\n",
       "      <td>0.566154</td>\n",
       "      <td>-0.368129</td>\n",
       "      <td>-0.174086</td>\n",
       "      <td>-1.619980</td>\n",
       "      <td>-0.334377</td>\n",
       "      <td>1.329901</td>\n",
       "      <td>-1.552544</td>\n",
       "      <td>-0.683254</td>\n",
       "      <td>0.606885</td>\n",
       "      <td>-0.580380</td>\n",
       "      <td>0.436700</td>\n",
       "      <td>-1.445817</td>\n",
       "      <td>-0.862692</td>\n",
       "      <td>-0.516107</td>\n",
       "      <td>-3.854006</td>\n",
       "      <td>1.488673</td>\n",
       "      <td>0.283305</td>\n",
       "      <td>-0.758342</td>\n",
       "      <td>-1.447038</td>\n",
       "      <td>2.477799</td>\n",
       "      <td>-0.113421</td>\n",
       "      <td>0.460692</td>\n",
       "      <td>-1.110022</td>\n",
       "      <td>-0.269635</td>\n",
       "      <td>0.127737</td>\n",
       "      <td>-1.632896</td>\n",
       "      <td>2.295154</td>\n",
       "      <td>0.965088</td>\n",
       "      <td>-0.534077</td>\n",
       "      <td>2.403713</td>\n",
       "      <td>-0.769188</td>\n",
       "      <td>2.437117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>b1d244a25a82adb7beafe33fe971402c</td>\n",
       "      <td>345266.51</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>364059</td>\n",
       "      <td>1748</td>\n",
       "      <td>365807</td>\n",
       "      <td>365807</td>\n",
       "      <td>1748</td>\n",
       "      <td>0</td>\n",
       "      <td>11969</td>\n",
       "      <td>57</td>\n",
       "      <td>12026</td>\n",
       "      <td>12026</td>\n",
       "      <td>57</td>\n",
       "      <td>0</td>\n",
       "      <td>997.416667</td>\n",
       "      <td>4.750000</td>\n",
       "      <td>1002.166667</td>\n",
       "      <td>1002.166667</td>\n",
       "      <td>4.750000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-1.410392e-32</td>\n",
       "      <td>1.273339e-31</td>\n",
       "      <td>-8.141020e-32</td>\n",
       "      <td>2.163897e-30</td>\n",
       "      <td>1.331409e-31</td>\n",
       "      <td>-6.148483e-31</td>\n",
       "      <td>1.653255e-30</td>\n",
       "      <td>2.443203e-30</td>\n",
       "      <td>7.139733e-31</td>\n",
       "      <td>1.090951e-29</td>\n",
       "      <td>-1.410392e-32</td>\n",
       "      <td>1.273339e-31</td>\n",
       "      <td>-8.141020e-32</td>\n",
       "      <td>2.163897e-30</td>\n",
       "      <td>1.331409e-31</td>\n",
       "      <td>-6.148483e-31</td>\n",
       "      <td>1.653255e-30</td>\n",
       "      <td>2.443203e-30</td>\n",
       "      <td>7.139733e-31</td>\n",
       "      <td>1.090951e-29</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>1.191350e-19</td>\n",
       "      <td>8.202335e-20</td>\n",
       "      <td>-7.986753e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.931267e-13</td>\n",
       "      <td>1.685272e-14</td>\n",
       "      <td>2.963131e-15</td>\n",
       "      <td>9.738470e-20</td>\n",
       "      <td>2.085465e-17</td>\n",
       "      <td>-3.055759e-19</td>\n",
       "      <td>1.191350e-19</td>\n",
       "      <td>8.202335e-20</td>\n",
       "      <td>-7.986753e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.931267e-13</td>\n",
       "      <td>1.685272e-14</td>\n",
       "      <td>2.963131e-15</td>\n",
       "      <td>9.738470e-20</td>\n",
       "      <td>2.085465e-17</td>\n",
       "      <td>-3.055759e-19</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.006221</td>\n",
       "      <td>0.001555</td>\n",
       "      <td>0.620219</td>\n",
       "      <td>2.537268</td>\n",
       "      <td>-0.674693</td>\n",
       "      <td>-5.360068</td>\n",
       "      <td>0.023221</td>\n",
       "      <td>-0.122647</td>\n",
       "      <td>0.001265</td>\n",
       "      <td>0.447589</td>\n",
       "      <td>-0.072313</td>\n",
       "      <td>-0.045287</td>\n",
       "      <td>0.954345</td>\n",
       "      <td>-0.266194</td>\n",
       "      <td>-0.054113</td>\n",
       "      <td>0.095159</td>\n",
       "      <td>-0.064070</td>\n",
       "      <td>-0.016413</td>\n",
       "      <td>-0.031555</td>\n",
       "      <td>-0.021995</td>\n",
       "      <td>-0.002435</td>\n",
       "      <td>0.001869</td>\n",
       "      <td>0.011952</td>\n",
       "      <td>0.253371</td>\n",
       "      <td>0.960214</td>\n",
       "      <td>-0.015594</td>\n",
       "      <td>0.776399</td>\n",
       "      <td>0.343235</td>\n",
       "      <td>5.580273</td>\n",
       "      <td>0.536097</td>\n",
       "      <td>-0.743166</td>\n",
       "      <td>0.109909</td>\n",
       "      <td>0.927692</td>\n",
       "      <td>-0.086164</td>\n",
       "      <td>-0.094302</td>\n",
       "      <td>-0.032104</td>\n",
       "      <td>-0.012549</td>\n",
       "      <td>0.026771</td>\n",
       "      <td>0.017189</td>\n",
       "      <td>-0.006457</td>\n",
       "      <td>-0.022007</td>\n",
       "      <td>-0.029549</td>\n",
       "      <td>0.002460</td>\n",
       "      <td>1.642961</td>\n",
       "      <td>-0.045107</td>\n",
       "      <td>4.847877</td>\n",
       "      <td>-0.090691</td>\n",
       "      <td>-0.832567</td>\n",
       "      <td>-0.031741</td>\n",
       "      <td>-0.027015</td>\n",
       "      <td>0.012764</td>\n",
       "      <td>-0.088627</td>\n",
       "      <td>1.044738</td>\n",
       "      <td>0.462644</td>\n",
       "      <td>0.349524</td>\n",
       "      <td>0.223502</td>\n",
       "      <td>0.210997</td>\n",
       "      <td>0.544947</td>\n",
       "      <td>0.017120</td>\n",
       "      <td>-0.043680</td>\n",
       "      <td>-1.613089</td>\n",
       "      <td>-2.583142</td>\n",
       "      <td>-2.152398</td>\n",
       "      <td>0.150843</td>\n",
       "      <td>1.850057</td>\n",
       "      <td>-0.022702</td>\n",
       "      <td>-1.284593</td>\n",
       "      <td>0.659154</td>\n",
       "      <td>0.069912</td>\n",
       "      <td>-0.546371</td>\n",
       "      <td>0.007978</td>\n",
       "      <td>-1.766838</td>\n",
       "      <td>-0.242589</td>\n",
       "      <td>1.300891</td>\n",
       "      <td>-1.232311</td>\n",
       "      <td>-0.658328</td>\n",
       "      <td>0.748789</td>\n",
       "      <td>-0.674275</td>\n",
       "      <td>0.760463</td>\n",
       "      <td>-1.553597</td>\n",
       "      <td>-1.171873</td>\n",
       "      <td>-0.458338</td>\n",
       "      <td>-4.781826</td>\n",
       "      <td>1.404563</td>\n",
       "      <td>0.476772</td>\n",
       "      <td>-0.980687</td>\n",
       "      <td>-1.494675</td>\n",
       "      <td>2.663459</td>\n",
       "      <td>-0.016259</td>\n",
       "      <td>0.320416</td>\n",
       "      <td>-1.267092</td>\n",
       "      <td>-0.348978</td>\n",
       "      <td>-0.024728</td>\n",
       "      <td>-1.914073</td>\n",
       "      <td>2.964499</td>\n",
       "      <td>1.383257</td>\n",
       "      <td>-0.749559</td>\n",
       "      <td>3.366637</td>\n",
       "      <td>-0.936711</td>\n",
       "      <td>2.617786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>85b1ab1270516d2ebe21ed00c6abbf27</td>\n",
       "      <td>690521.61</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9818</td>\n",
       "      <td>1130</td>\n",
       "      <td>10948</td>\n",
       "      <td>10948</td>\n",
       "      <td>1130</td>\n",
       "      <td>0</td>\n",
       "      <td>323</td>\n",
       "      <td>37</td>\n",
       "      <td>360</td>\n",
       "      <td>360</td>\n",
       "      <td>37</td>\n",
       "      <td>0</td>\n",
       "      <td>26.916667</td>\n",
       "      <td>3.083333</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>3.083333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2.116352e-32</td>\n",
       "      <td>-1.864457e-31</td>\n",
       "      <td>6.685384e-31</td>\n",
       "      <td>-4.770473e-30</td>\n",
       "      <td>-3.592343e-31</td>\n",
       "      <td>7.272122e-31</td>\n",
       "      <td>3.420462e-30</td>\n",
       "      <td>-5.808280e-30</td>\n",
       "      <td>-4.844511e-30</td>\n",
       "      <td>-1.512985e-31</td>\n",
       "      <td>2.116352e-32</td>\n",
       "      <td>-1.864457e-31</td>\n",
       "      <td>6.685384e-31</td>\n",
       "      <td>-4.770473e-30</td>\n",
       "      <td>-3.592343e-31</td>\n",
       "      <td>7.272122e-31</td>\n",
       "      <td>3.420462e-30</td>\n",
       "      <td>-5.808280e-30</td>\n",
       "      <td>-4.844511e-30</td>\n",
       "      <td>-1.512985e-31</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>8.890848e-20</td>\n",
       "      <td>5.167360e-20</td>\n",
       "      <td>3.042293e-21</td>\n",
       "      <td>1.043209e-19</td>\n",
       "      <td>1.619241e-18</td>\n",
       "      <td>1.076051e-17</td>\n",
       "      <td>5.214132e-17</td>\n",
       "      <td>-3.663833e-16</td>\n",
       "      <td>-2.029394e-15</td>\n",
       "      <td>1.134680e-13</td>\n",
       "      <td>8.890848e-20</td>\n",
       "      <td>5.167360e-20</td>\n",
       "      <td>3.042293e-21</td>\n",
       "      <td>1.043209e-19</td>\n",
       "      <td>1.619241e-18</td>\n",
       "      <td>1.076051e-17</td>\n",
       "      <td>5.214132e-17</td>\n",
       "      <td>-3.663833e-16</td>\n",
       "      <td>-2.029394e-15</td>\n",
       "      <td>1.134680e-13</td>\n",
       "      <td>...</td>\n",
       "      <td>0.346344</td>\n",
       "      <td>-0.020487</td>\n",
       "      <td>1.355179</td>\n",
       "      <td>16.244522</td>\n",
       "      <td>-3.977844</td>\n",
       "      <td>4.805528</td>\n",
       "      <td>0.158932</td>\n",
       "      <td>-0.000969</td>\n",
       "      <td>-0.142467</td>\n",
       "      <td>-7.910744</td>\n",
       "      <td>-0.283739</td>\n",
       "      <td>1.281801</td>\n",
       "      <td>0.038409</td>\n",
       "      <td>0.071916</td>\n",
       "      <td>0.027822</td>\n",
       "      <td>0.008290</td>\n",
       "      <td>0.023041</td>\n",
       "      <td>0.019194</td>\n",
       "      <td>0.000861</td>\n",
       "      <td>-0.004176</td>\n",
       "      <td>-0.003173</td>\n",
       "      <td>-0.004939</td>\n",
       "      <td>0.142981</td>\n",
       "      <td>6.949969</td>\n",
       "      <td>10.172628</td>\n",
       "      <td>-0.354891</td>\n",
       "      <td>0.547250</td>\n",
       "      <td>3.946043</td>\n",
       "      <td>0.827492</td>\n",
       "      <td>3.289976</td>\n",
       "      <td>-0.392159</td>\n",
       "      <td>-2.222795</td>\n",
       "      <td>0.023494</td>\n",
       "      <td>0.102962</td>\n",
       "      <td>0.020178</td>\n",
       "      <td>-0.002688</td>\n",
       "      <td>-0.000106</td>\n",
       "      <td>-0.001943</td>\n",
       "      <td>-0.004486</td>\n",
       "      <td>-0.005001</td>\n",
       "      <td>-0.004576</td>\n",
       "      <td>-0.002313</td>\n",
       "      <td>0.002035</td>\n",
       "      <td>11.582282</td>\n",
       "      <td>-0.467038</td>\n",
       "      <td>-1.174324</td>\n",
       "      <td>-0.017412</td>\n",
       "      <td>-1.006417</td>\n",
       "      <td>-0.015536</td>\n",
       "      <td>-0.011788</td>\n",
       "      <td>0.019163</td>\n",
       "      <td>0.003630</td>\n",
       "      <td>0.826676</td>\n",
       "      <td>0.204695</td>\n",
       "      <td>-0.542103</td>\n",
       "      <td>-0.000163</td>\n",
       "      <td>0.180474</td>\n",
       "      <td>0.180687</td>\n",
       "      <td>-0.122410</td>\n",
       "      <td>0.016429</td>\n",
       "      <td>-1.021788</td>\n",
       "      <td>-0.272600</td>\n",
       "      <td>-1.789511</td>\n",
       "      <td>0.565752</td>\n",
       "      <td>2.424536</td>\n",
       "      <td>-0.423115</td>\n",
       "      <td>-0.470062</td>\n",
       "      <td>-0.397190</td>\n",
       "      <td>1.329337</td>\n",
       "      <td>0.479260</td>\n",
       "      <td>0.200958</td>\n",
       "      <td>-0.848595</td>\n",
       "      <td>-0.169067</td>\n",
       "      <td>1.447362</td>\n",
       "      <td>-1.504765</td>\n",
       "      <td>-0.733271</td>\n",
       "      <td>0.182754</td>\n",
       "      <td>-1.634504</td>\n",
       "      <td>-0.224778</td>\n",
       "      <td>-1.601706</td>\n",
       "      <td>0.279196</td>\n",
       "      <td>-0.180027</td>\n",
       "      <td>-2.420538</td>\n",
       "      <td>1.218689</td>\n",
       "      <td>-0.436527</td>\n",
       "      <td>-0.123236</td>\n",
       "      <td>-1.626440</td>\n",
       "      <td>1.532142</td>\n",
       "      <td>-0.493065</td>\n",
       "      <td>1.133597</td>\n",
       "      <td>-0.741304</td>\n",
       "      <td>-0.691263</td>\n",
       "      <td>0.222341</td>\n",
       "      <td>-1.410666</td>\n",
       "      <td>1.065484</td>\n",
       "      <td>-0.013875</td>\n",
       "      <td>0.627372</td>\n",
       "      <td>1.665259</td>\n",
       "      <td>-0.207545</td>\n",
       "      <td>0.923987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ef194610bdbecdea9af3cc23bceba8b2</td>\n",
       "      <td>1312010.26</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>364059</td>\n",
       "      <td>4140</td>\n",
       "      <td>368199</td>\n",
       "      <td>368199</td>\n",
       "      <td>4140</td>\n",
       "      <td>0</td>\n",
       "      <td>11969</td>\n",
       "      <td>136</td>\n",
       "      <td>12105</td>\n",
       "      <td>12105</td>\n",
       "      <td>136</td>\n",
       "      <td>0</td>\n",
       "      <td>997.416667</td>\n",
       "      <td>11.333333</td>\n",
       "      <td>1008.750000</td>\n",
       "      <td>1008.750000</td>\n",
       "      <td>11.333333</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>-1.676322e-32</td>\n",
       "      <td>3.206701e-31</td>\n",
       "      <td>-9.128244e-31</td>\n",
       "      <td>2.888886e-30</td>\n",
       "      <td>-4.781771e-31</td>\n",
       "      <td>2.151488e-30</td>\n",
       "      <td>2.749349e-30</td>\n",
       "      <td>4.982391e-30</td>\n",
       "      <td>7.240230e-30</td>\n",
       "      <td>1.619470e-29</td>\n",
       "      <td>-1.676322e-32</td>\n",
       "      <td>3.206701e-31</td>\n",
       "      <td>-9.128244e-31</td>\n",
       "      <td>2.888886e-30</td>\n",
       "      <td>-4.781771e-31</td>\n",
       "      <td>2.151488e-30</td>\n",
       "      <td>2.749349e-30</td>\n",
       "      <td>4.982391e-30</td>\n",
       "      <td>7.240230e-30</td>\n",
       "      <td>1.619470e-29</td>\n",
       "      <td>-6.700930e-34</td>\n",
       "      <td>4.599464e-29</td>\n",
       "      <td>-1.806836e-27</td>\n",
       "      <td>2.730483e-21</td>\n",
       "      <td>5.325693e-16</td>\n",
       "      <td>8.374388e-16</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>-5.751836e-15</td>\n",
       "      <td>1.363924e-16</td>\n",
       "      <td>8.418867e-17</td>\n",
       "      <td>-6.700930e-34</td>\n",
       "      <td>4.599464e-29</td>\n",
       "      <td>-1.806836e-27</td>\n",
       "      <td>2.730483e-21</td>\n",
       "      <td>5.325693e-16</td>\n",
       "      <td>8.374388e-16</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>-5.751836e-15</td>\n",
       "      <td>1.363924e-16</td>\n",
       "      <td>8.418867e-17</td>\n",
       "      <td>-3.234077e-12</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>8.793772e-17</td>\n",
       "      <td>-8.445156e-20</td>\n",
       "      <td>4.161935e-20</td>\n",
       "      <td>-1.638265e-19</td>\n",
       "      <td>9.081989e-19</td>\n",
       "      <td>-1.163952e-17</td>\n",
       "      <td>4.433245e-18</td>\n",
       "      <td>-1.508798e-19</td>\n",
       "      <td>-3.234077e-12</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>8.793772e-17</td>\n",
       "      <td>-8.445156e-20</td>\n",
       "      <td>4.161935e-20</td>\n",
       "      <td>-1.638265e-19</td>\n",
       "      <td>9.081989e-19</td>\n",
       "      <td>-1.163952e-17</td>\n",
       "      <td>4.433245e-18</td>\n",
       "      <td>-1.508798e-19</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.005117</td>\n",
       "      <td>-0.021882</td>\n",
       "      <td>26.182480</td>\n",
       "      <td>17.705948</td>\n",
       "      <td>-4.749788</td>\n",
       "      <td>-37.287020</td>\n",
       "      <td>-1.234869</td>\n",
       "      <td>-0.770082</td>\n",
       "      <td>-0.150895</td>\n",
       "      <td>-1.078964</td>\n",
       "      <td>-0.585895</td>\n",
       "      <td>0.642842</td>\n",
       "      <td>0.326897</td>\n",
       "      <td>0.360825</td>\n",
       "      <td>0.040931</td>\n",
       "      <td>-0.023629</td>\n",
       "      <td>-0.147423</td>\n",
       "      <td>0.009431</td>\n",
       "      <td>-0.001184</td>\n",
       "      <td>0.027686</td>\n",
       "      <td>-0.022390</td>\n",
       "      <td>-0.055930</td>\n",
       "      <td>0.634773</td>\n",
       "      <td>21.849386</td>\n",
       "      <td>46.912099</td>\n",
       "      <td>-1.122035</td>\n",
       "      <td>3.347970</td>\n",
       "      <td>-0.331290</td>\n",
       "      <td>30.115275</td>\n",
       "      <td>-11.977400</td>\n",
       "      <td>-8.193690</td>\n",
       "      <td>-7.806300</td>\n",
       "      <td>0.273279</td>\n",
       "      <td>0.036439</td>\n",
       "      <td>0.008137</td>\n",
       "      <td>0.145022</td>\n",
       "      <td>0.088603</td>\n",
       "      <td>-0.013096</td>\n",
       "      <td>-0.017238</td>\n",
       "      <td>-0.020591</td>\n",
       "      <td>-0.023771</td>\n",
       "      <td>-0.007787</td>\n",
       "      <td>0.021611</td>\n",
       "      <td>20.482397</td>\n",
       "      <td>-0.566625</td>\n",
       "      <td>50.022002</td>\n",
       "      <td>-0.820114</td>\n",
       "      <td>-2.476982</td>\n",
       "      <td>-0.255035</td>\n",
       "      <td>-0.253709</td>\n",
       "      <td>-0.030940</td>\n",
       "      <td>-0.391722</td>\n",
       "      <td>1.085397</td>\n",
       "      <td>0.202935</td>\n",
       "      <td>-0.169672</td>\n",
       "      <td>0.129676</td>\n",
       "      <td>0.259515</td>\n",
       "      <td>0.462957</td>\n",
       "      <td>0.130256</td>\n",
       "      <td>-0.305817</td>\n",
       "      <td>-1.278671</td>\n",
       "      <td>-1.846682</td>\n",
       "      <td>-1.628390</td>\n",
       "      <td>0.624487</td>\n",
       "      <td>1.518664</td>\n",
       "      <td>0.057439</td>\n",
       "      <td>-0.598697</td>\n",
       "      <td>0.338764</td>\n",
       "      <td>0.698881</td>\n",
       "      <td>0.031054</td>\n",
       "      <td>-0.145838</td>\n",
       "      <td>-1.469987</td>\n",
       "      <td>-0.148904</td>\n",
       "      <td>1.424731</td>\n",
       "      <td>-1.489185</td>\n",
       "      <td>-0.453307</td>\n",
       "      <td>0.231479</td>\n",
       "      <td>-0.645183</td>\n",
       "      <td>0.041408</td>\n",
       "      <td>-0.725097</td>\n",
       "      <td>-0.169144</td>\n",
       "      <td>-0.441308</td>\n",
       "      <td>-2.946208</td>\n",
       "      <td>1.374965</td>\n",
       "      <td>0.065402</td>\n",
       "      <td>-0.540489</td>\n",
       "      <td>-1.105266</td>\n",
       "      <td>2.261206</td>\n",
       "      <td>0.090320</td>\n",
       "      <td>0.749933</td>\n",
       "      <td>-1.146302</td>\n",
       "      <td>-0.647657</td>\n",
       "      <td>-0.176236</td>\n",
       "      <td>-1.444758</td>\n",
       "      <td>1.883517</td>\n",
       "      <td>0.630231</td>\n",
       "      <td>-0.187371</td>\n",
       "      <td>1.957782</td>\n",
       "      <td>-0.172917</td>\n",
       "      <td>1.954081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1220f9592fdd0b3fa9bbbd90e6d69d84</td>\n",
       "      <td>6904.22</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>364059</td>\n",
       "      <td>1203</td>\n",
       "      <td>365262</td>\n",
       "      <td>365262</td>\n",
       "      <td>1203</td>\n",
       "      <td>0</td>\n",
       "      <td>11969</td>\n",
       "      <td>39</td>\n",
       "      <td>12008</td>\n",
       "      <td>12008</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>997.416667</td>\n",
       "      <td>3.250000</td>\n",
       "      <td>1000.666667</td>\n",
       "      <td>1000.666667</td>\n",
       "      <td>3.250000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2.565327e-32</td>\n",
       "      <td>-3.368057e-31</td>\n",
       "      <td>5.346743e-31</td>\n",
       "      <td>-4.084920e-30</td>\n",
       "      <td>-1.081447e-30</td>\n",
       "      <td>2.307360e-30</td>\n",
       "      <td>6.182635e-30</td>\n",
       "      <td>-7.799187e-30</td>\n",
       "      <td>-4.088520e-30</td>\n",
       "      <td>-1.147978e-29</td>\n",
       "      <td>2.565327e-32</td>\n",
       "      <td>-3.368057e-31</td>\n",
       "      <td>5.346743e-31</td>\n",
       "      <td>-4.084920e-30</td>\n",
       "      <td>-1.081447e-30</td>\n",
       "      <td>2.307360e-30</td>\n",
       "      <td>6.182635e-30</td>\n",
       "      <td>-7.799187e-30</td>\n",
       "      <td>-4.088520e-30</td>\n",
       "      <td>-1.147978e-29</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>2.426068e-17</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>1.346388e-16</td>\n",
       "      <td>2.266380e-23</td>\n",
       "      <td>-1.150903e-27</td>\n",
       "      <td>-1.622419e-29</td>\n",
       "      <td>-4.599464e-29</td>\n",
       "      <td>-1.176252e-27</td>\n",
       "      <td>-6.493528e-31</td>\n",
       "      <td>6.649440e-31</td>\n",
       "      <td>2.769857e-19</td>\n",
       "      <td>1.637761e-19</td>\n",
       "      <td>4.382888e-17</td>\n",
       "      <td>-1.687456e-14</td>\n",
       "      <td>5.085346e-13</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.153623e-13</td>\n",
       "      <td>1.701940e-15</td>\n",
       "      <td>-3.050288e-16</td>\n",
       "      <td>-5.825926e-18</td>\n",
       "      <td>2.769857e-19</td>\n",
       "      <td>1.637761e-19</td>\n",
       "      <td>4.382888e-17</td>\n",
       "      <td>-1.687456e-14</td>\n",
       "      <td>5.085346e-13</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>2.153623e-13</td>\n",
       "      <td>1.701940e-15</td>\n",
       "      <td>-3.050288e-16</td>\n",
       "      <td>-5.825926e-18</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.008447</td>\n",
       "      <td>-0.023078</td>\n",
       "      <td>1.445150</td>\n",
       "      <td>5.906150</td>\n",
       "      <td>-1.568768</td>\n",
       "      <td>-12.372115</td>\n",
       "      <td>0.050952</td>\n",
       "      <td>-0.286854</td>\n",
       "      <td>0.012423</td>\n",
       "      <td>1.025344</td>\n",
       "      <td>-0.142662</td>\n",
       "      <td>-0.096008</td>\n",
       "      <td>0.709638</td>\n",
       "      <td>-0.152292</td>\n",
       "      <td>0.086933</td>\n",
       "      <td>0.011396</td>\n",
       "      <td>0.176545</td>\n",
       "      <td>0.046775</td>\n",
       "      <td>0.007150</td>\n",
       "      <td>-0.024400</td>\n",
       "      <td>-0.002993</td>\n",
       "      <td>-0.006095</td>\n",
       "      <td>0.039697</td>\n",
       "      <td>1.746553</td>\n",
       "      <td>2.098073</td>\n",
       "      <td>-0.060061</td>\n",
       "      <td>1.619576</td>\n",
       "      <td>2.563606</td>\n",
       "      <td>11.115729</td>\n",
       "      <td>3.037973</td>\n",
       "      <td>-1.370317</td>\n",
       "      <td>-0.048690</td>\n",
       "      <td>0.616211</td>\n",
       "      <td>-0.048664</td>\n",
       "      <td>-0.057070</td>\n",
       "      <td>-0.006821</td>\n",
       "      <td>-0.025947</td>\n",
       "      <td>0.512331</td>\n",
       "      <td>-0.037030</td>\n",
       "      <td>-0.004062</td>\n",
       "      <td>-0.022019</td>\n",
       "      <td>0.010974</td>\n",
       "      <td>0.003922</td>\n",
       "      <td>3.888193</td>\n",
       "      <td>-0.103991</td>\n",
       "      <td>10.747376</td>\n",
       "      <td>-0.201868</td>\n",
       "      <td>-1.029359</td>\n",
       "      <td>-0.084598</td>\n",
       "      <td>-0.058490</td>\n",
       "      <td>0.003925</td>\n",
       "      <td>-0.189564</td>\n",
       "      <td>1.079759</td>\n",
       "      <td>0.517163</td>\n",
       "      <td>0.122800</td>\n",
       "      <td>0.164728</td>\n",
       "      <td>0.133404</td>\n",
       "      <td>0.410240</td>\n",
       "      <td>0.037670</td>\n",
       "      <td>-0.004558</td>\n",
       "      <td>-1.486575</td>\n",
       "      <td>-2.373965</td>\n",
       "      <td>-2.459573</td>\n",
       "      <td>0.558129</td>\n",
       "      <td>2.161080</td>\n",
       "      <td>0.274007</td>\n",
       "      <td>-1.494697</td>\n",
       "      <td>0.878058</td>\n",
       "      <td>0.552586</td>\n",
       "      <td>-0.386907</td>\n",
       "      <td>-0.155726</td>\n",
       "      <td>-1.659545</td>\n",
       "      <td>-0.318225</td>\n",
       "      <td>1.383669</td>\n",
       "      <td>-1.387218</td>\n",
       "      <td>-0.856718</td>\n",
       "      <td>0.895416</td>\n",
       "      <td>-0.505446</td>\n",
       "      <td>0.888153</td>\n",
       "      <td>-1.531110</td>\n",
       "      <td>-0.831957</td>\n",
       "      <td>-0.358353</td>\n",
       "      <td>-4.640062</td>\n",
       "      <td>1.411405</td>\n",
       "      <td>0.327792</td>\n",
       "      <td>-0.868070</td>\n",
       "      <td>-1.383799</td>\n",
       "      <td>2.445411</td>\n",
       "      <td>0.043965</td>\n",
       "      <td>0.412626</td>\n",
       "      <td>-1.160302</td>\n",
       "      <td>-0.433618</td>\n",
       "      <td>-0.217280</td>\n",
       "      <td>-1.730701</td>\n",
       "      <td>2.426631</td>\n",
       "      <td>1.139691</td>\n",
       "      <td>-0.668761</td>\n",
       "      <td>3.439743</td>\n",
       "      <td>-0.801678</td>\n",
       "      <td>2.302606</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 607 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                               客户编号        注册资本  是否长期经营  经营成立时间是否相等  注册资金过小  \\\n",
       "0  158a8d99bec2a2b652a6de45a2b52ec9   690521.61       0           1       0   \n",
       "1  b1d244a25a82adb7beafe33fe971402c   345266.51       1           1       0   \n",
       "2  85b1ab1270516d2ebe21ed00c6abbf27   690521.61       0           1       0   \n",
       "3  ef194610bdbecdea9af3cc23bceba8b2  1312010.26       1           1       0   \n",
       "4  1220f9592fdd0b3fa9bbbd90e6d69d84     6904.22       1           1       0   \n",
       "\n",
       "   经营是否已过期  剩余经营天数_天  已经营天数_天  当期经营期限总天数_天  自成立经营期限总天数_天  已成立天数_天  再次经营_天  \\\n",
       "0        0      5655     1644         7299          7299     1644       0   \n",
       "1        0    364059     1748       365807        365807     1748       0   \n",
       "2        0      9818     1130        10948         10948     1130       0   \n",
       "3        0    364059     4140       368199        368199     4140       0   \n",
       "4        0    364059     1203       365262        365262     1203       0   \n",
       "\n",
       "   剩余经营天数_月  已经营天数_月  当期经营期限总天数_月  自成立经营期限总天数_月  已成立天数_月  再次经营_月    剩余经营天数_年  \\\n",
       "0       186       54          240           240       54       0   15.500000   \n",
       "1     11969       57        12026         12026       57       0  997.416667   \n",
       "2       323       37          360           360       37       0   26.916667   \n",
       "3     11969      136        12105         12105      136       0  997.416667   \n",
       "4     11969       39        12008         12008       39       0  997.416667   \n",
       "\n",
       "     已经营天数_年  当期经营期限总天数_年  自成立经营期限总天数_年    已成立天数_年  再次经营_年  经营状态_编码  \\\n",
       "0   4.500000    20.000000     20.000000   4.500000     0.0        1   \n",
       "1   4.750000  1002.166667   1002.166667   4.750000     0.0        1   \n",
       "2   3.083333    30.000000     30.000000   3.083333     0.0        1   \n",
       "3  11.333333  1008.750000   1008.750000  11.333333     0.0        1   \n",
       "4   3.250000  1000.666667   1000.666667   3.250000     0.0        1   \n",
       "\n",
       "   企业（机构）类型编码_频数是否前10  企业（机构）类型编码_频数是否后20  企业（机构）类型编码_是否频数最高2类  所在省份编码_频数是否前5  \\\n",
       "0                   1                   0                    1              0   \n",
       "1                   1                   0                    1              1   \n",
       "2                   1                   0                    1              0   \n",
       "3                   1                   0                    0              1   \n",
       "4                   1                   0                    1              0   \n",
       "\n",
       "   所在省份编码_频数是否后5  企业（机构）类型编码_是否坏率最高2类  国民经济行业代码_频数是否前5  国民经济行业代码_频数是否后230  \\\n",
       "0              0                    0                0                  0   \n",
       "1              0                    0                0                  0   \n",
       "2              0                    0                0                  0   \n",
       "3              0                    0                0                  0   \n",
       "4              0                    0                0                  0   \n",
       "\n",
       "   企业（机构）类型编码_分箱  所在省份编码_分箱  国民经济行业代码_分箱  法定代表人相关企业个数  法人涉足企业类型  法人涉足国民经济行业代码  \\\n",
       "0              1          2            4            1         1             1   \n",
       "1              3          3            3            1         1             1   \n",
       "2              3          2            3            1         1             1   \n",
       "3              0          0            1            1         1             1   \n",
       "4              3          5            3            1         1             1   \n",
       "\n",
       "   法人跨省个数  法定代表人_tfidf_0  法定代表人_tfidf_1  法定代表人_tfidf_2  法定代表人_tfidf_3  \\\n",
       "0       1   3.112701e-32  -3.182684e-31   1.459411e-30  -3.919896e-30   \n",
       "1       1  -1.410392e-32   1.273339e-31  -8.141020e-32   2.163897e-30   \n",
       "2       1   2.116352e-32  -1.864457e-31   6.685384e-31  -4.770473e-30   \n",
       "3       1  -1.676322e-32   3.206701e-31  -9.128244e-31   2.888886e-30   \n",
       "4       1   2.565327e-32  -3.368057e-31   5.346743e-31  -4.084920e-30   \n",
       "\n",
       "   法定代表人_tfidf_4  法定代表人_tfidf_5  法定代表人_tfidf_6  法定代表人_tfidf_7  法定代表人_tfidf_8  \\\n",
       "0  -1.098859e-30  -2.575129e-30   1.086710e-29  -1.080662e-29  -1.030773e-29   \n",
       "1   1.331409e-31  -6.148483e-31   1.653255e-30   2.443203e-30   7.139733e-31   \n",
       "2  -3.592343e-31   7.272122e-31   3.420462e-30  -5.808280e-30  -4.844511e-30   \n",
       "3  -4.781771e-31   2.151488e-30   2.749349e-30   4.982391e-30   7.240230e-30   \n",
       "4  -1.081447e-30   2.307360e-30   6.182635e-30  -7.799187e-30  -4.088520e-30   \n",
       "\n",
       "   法定代表人_tfidf_9  法定代表人_countvec_0  法定代表人_countvec_1  法定代表人_countvec_2  \\\n",
       "0  -6.907220e-30      3.112701e-32     -3.182684e-31      1.459411e-30   \n",
       "1   1.090951e-29     -1.410392e-32      1.273339e-31     -8.141020e-32   \n",
       "2  -1.512985e-31      2.116352e-32     -1.864457e-31      6.685384e-31   \n",
       "3   1.619470e-29     -1.676322e-32      3.206701e-31     -9.128244e-31   \n",
       "4  -1.147978e-29      2.565327e-32     -3.368057e-31      5.346743e-31   \n",
       "\n",
       "   法定代表人_countvec_3  法定代表人_countvec_4  法定代表人_countvec_5  法定代表人_countvec_6  \\\n",
       "0     -3.919896e-30     -1.098859e-30     -2.575129e-30      1.086710e-29   \n",
       "1      2.163897e-30      1.331409e-31     -6.148483e-31      1.653255e-30   \n",
       "2     -4.770473e-30     -3.592343e-31      7.272122e-31      3.420462e-30   \n",
       "3      2.888886e-30     -4.781771e-31      2.151488e-30      2.749349e-30   \n",
       "4     -4.084920e-30     -1.081447e-30      2.307360e-30      6.182635e-30   \n",
       "\n",
       "   法定代表人_countvec_7  法定代表人_countvec_8  法定代表人_countvec_9  企业（机构）类型编码_tfidf_0  \\\n",
       "0     -1.080662e-29     -1.030773e-29     -6.907220e-30        1.000000e+00   \n",
       "1      2.443203e-30      7.139733e-31      1.090951e-29        2.426068e-17   \n",
       "2     -5.808280e-30     -4.844511e-30     -1.512985e-31        2.426068e-17   \n",
       "3      4.982391e-30      7.240230e-30      1.619470e-29       -6.700930e-34   \n",
       "4     -7.799187e-30     -4.088520e-30     -1.147978e-29        2.426068e-17   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_1  企业（机构）类型编码_tfidf_2  企业（机构）类型编码_tfidf_3  \\\n",
       "0       -2.426068e-17       -1.053226e-21       -1.867483e-28   \n",
       "1        1.000000e+00        1.346388e-16        2.266380e-23   \n",
       "2        1.000000e+00        1.346388e-16        2.266380e-23   \n",
       "3        4.599464e-29       -1.806836e-27        2.730483e-21   \n",
       "4        1.000000e+00        1.346388e-16        2.266380e-23   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_4  企业（机构）类型编码_tfidf_5  企业（机构）类型编码_tfidf_6  \\\n",
       "0        9.566782e-33        1.061066e-34        6.700930e-34   \n",
       "1       -1.150903e-27       -1.622419e-29       -4.599464e-29   \n",
       "2       -1.150903e-27       -1.622419e-29       -4.599464e-29   \n",
       "3        5.325693e-16        8.374388e-16        1.000000e+00   \n",
       "4       -1.150903e-27       -1.622419e-29       -4.599464e-29   \n",
       "\n",
       "   企业（机构）类型编码_tfidf_7  企业（机构）类型编码_tfidf_8  企业（机构）类型编码_tfidf_9  \\\n",
       "0        1.710763e-32        9.444267e-36       -9.671025e-36   \n",
       "1       -1.176252e-27       -6.493528e-31        6.649440e-31   \n",
       "2       -1.176252e-27       -6.493528e-31        6.649440e-31   \n",
       "3       -5.751836e-15        1.363924e-16        8.418867e-17   \n",
       "4       -1.176252e-27       -6.493528e-31        6.649440e-31   \n",
       "\n",
       "   企业（机构）类型编码_countvec_0  企业（机构）类型编码_countvec_1  企业（机构）类型编码_countvec_2  \\\n",
       "0           1.000000e+00          -2.426068e-17          -1.053226e-21   \n",
       "1           2.426068e-17           1.000000e+00           1.346388e-16   \n",
       "2           2.426068e-17           1.000000e+00           1.346388e-16   \n",
       "3          -6.700930e-34           4.599464e-29          -1.806836e-27   \n",
       "4           2.426068e-17           1.000000e+00           1.346388e-16   \n",
       "\n",
       "   企业（机构）类型编码_countvec_3  企业（机构）类型编码_countvec_4  企业（机构）类型编码_countvec_5  \\\n",
       "0          -1.867483e-28           9.566782e-33           1.061066e-34   \n",
       "1           2.266380e-23          -1.150903e-27          -1.622419e-29   \n",
       "2           2.266380e-23          -1.150903e-27          -1.622419e-29   \n",
       "3           2.730483e-21           5.325693e-16           8.374388e-16   \n",
       "4           2.266380e-23          -1.150903e-27          -1.622419e-29   \n",
       "\n",
       "   企业（机构）类型编码_countvec_6  企业（机构）类型编码_countvec_7  企业（机构）类型编码_countvec_8  \\\n",
       "0           6.700930e-34           1.710763e-32           9.444267e-36   \n",
       "1          -4.599464e-29          -1.176252e-27          -6.493528e-31   \n",
       "2          -4.599464e-29          -1.176252e-27          -6.493528e-31   \n",
       "3           1.000000e+00          -5.751836e-15           1.363924e-16   \n",
       "4          -4.599464e-29          -1.176252e-27          -6.493528e-31   \n",
       "\n",
       "   企业（机构）类型编码_countvec_9  所在省份编码_tfidf_0  所在省份编码_tfidf_1  所在省份编码_tfidf_2  \\\n",
       "0          -9.671025e-36    8.890848e-20    5.167360e-20    3.042293e-21   \n",
       "1           6.649440e-31    1.191350e-19    8.202335e-20   -7.986753e-17   \n",
       "2           6.649440e-31    8.890848e-20    5.167360e-20    3.042293e-21   \n",
       "3           8.418867e-17   -3.234077e-12    1.000000e+00    8.793772e-17   \n",
       "4           6.649440e-31    2.769857e-19    1.637761e-19    4.382888e-17   \n",
       "\n",
       "   所在省份编码_tfidf_3  所在省份编码_tfidf_4  所在省份编码_tfidf_5  所在省份编码_tfidf_6  \\\n",
       "0    1.043209e-19    1.619241e-18    1.076051e-17    5.214132e-17   \n",
       "1    1.000000e+00    1.931267e-13    1.685272e-14    2.963131e-15   \n",
       "2    1.043209e-19    1.619241e-18    1.076051e-17    5.214132e-17   \n",
       "3   -8.445156e-20    4.161935e-20   -1.638265e-19    9.081989e-19   \n",
       "4   -1.687456e-14    5.085346e-13    1.000000e+00    2.153623e-13   \n",
       "\n",
       "   所在省份编码_tfidf_7  所在省份编码_tfidf_8  所在省份编码_tfidf_9  所在省份编码_countvec_0  \\\n",
       "0   -3.663833e-16   -2.029394e-15    1.134680e-13       8.890848e-20   \n",
       "1    9.738470e-20    2.085465e-17   -3.055759e-19       1.191350e-19   \n",
       "2   -3.663833e-16   -2.029394e-15    1.134680e-13       8.890848e-20   \n",
       "3   -1.163952e-17    4.433245e-18   -1.508798e-19      -3.234077e-12   \n",
       "4    1.701940e-15   -3.050288e-16   -5.825926e-18       2.769857e-19   \n",
       "\n",
       "   所在省份编码_countvec_1  所在省份编码_countvec_2  所在省份编码_countvec_3  所在省份编码_countvec_4  \\\n",
       "0       5.167360e-20       3.042293e-21       1.043209e-19       1.619241e-18   \n",
       "1       8.202335e-20      -7.986753e-17       1.000000e+00       1.931267e-13   \n",
       "2       5.167360e-20       3.042293e-21       1.043209e-19       1.619241e-18   \n",
       "3       1.000000e+00       8.793772e-17      -8.445156e-20       4.161935e-20   \n",
       "4       1.637761e-19       4.382888e-17      -1.687456e-14       5.085346e-13   \n",
       "\n",
       "   所在省份编码_countvec_5  所在省份编码_countvec_6  所在省份编码_countvec_7  所在省份编码_countvec_8  \\\n",
       "0       1.076051e-17       5.214132e-17      -3.663833e-16      -2.029394e-15   \n",
       "1       1.685272e-14       2.963131e-15       9.738470e-20       2.085465e-17   \n",
       "2       1.076051e-17       5.214132e-17      -3.663833e-16      -2.029394e-15   \n",
       "3      -1.638265e-19       9.081989e-19      -1.163952e-17       4.433245e-18   \n",
       "4       1.000000e+00       2.153623e-13       1.701940e-15      -3.050288e-16   \n",
       "\n",
       "   所在省份编码_countvec_9  ...  渠道代码_摘要信息_tfidf_8  渠道代码_摘要信息_tfidf_9  \\\n",
       "0       1.134680e-13  ...          -0.034319          -0.002089   \n",
       "1      -3.055759e-19  ...          -0.006221           0.001555   \n",
       "2       1.134680e-13  ...           0.346344          -0.020487   \n",
       "3      -1.508798e-19  ...          -0.005117          -0.021882   \n",
       "4      -5.825926e-18  ...          -0.008447          -0.023078   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_0  渠道代码_摘要信息_countvec_1  渠道代码_摘要信息_countvec_2  \\\n",
       "0              1.858848              8.185632             -2.171361   \n",
       "1              0.620219              2.537268             -0.674693   \n",
       "2              1.355179             16.244522             -3.977844   \n",
       "3             26.182480             17.705948             -4.749788   \n",
       "4              1.445150              5.906150             -1.568768   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_3  渠道代码_摘要信息_countvec_4  渠道代码_摘要信息_countvec_5  \\\n",
       "0            -16.286702              0.072092             -0.356895   \n",
       "1             -5.360068              0.023221             -0.122647   \n",
       "2              4.805528              0.158932             -0.000969   \n",
       "3            -37.287020             -1.234869             -0.770082   \n",
       "4            -12.372115              0.050952             -0.286854   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_6  渠道代码_摘要信息_countvec_7  渠道代码_摘要信息_countvec_8  \\\n",
       "0              0.006055              1.709881             -0.224256   \n",
       "1              0.001265              0.447589             -0.072313   \n",
       "2             -0.142467             -7.910744             -0.283739   \n",
       "3             -0.150895             -1.078964             -0.585895   \n",
       "4              0.012423              1.025344             -0.142662   \n",
       "\n",
       "   渠道代码_摘要信息_countvec_9  渠道代码_交易对手客户编号_tfidf_0  渠道代码_交易对手客户编号_tfidf_1  \\\n",
       "0             -0.140932               0.747345               0.084474   \n",
       "1             -0.045287               0.954345              -0.266194   \n",
       "2              1.281801               0.038409               0.071916   \n",
       "3              0.642842               0.326897               0.360825   \n",
       "4             -0.096008               0.709638              -0.152292   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_2  渠道代码_交易对手客户编号_tfidf_3  渠道代码_交易对手客户编号_tfidf_4  \\\n",
       "0              -0.118081              -0.087905              -0.029901   \n",
       "1              -0.054113               0.095159              -0.064070   \n",
       "2               0.027822               0.008290               0.023041   \n",
       "3               0.040931              -0.023629              -0.147423   \n",
       "4               0.086933               0.011396               0.176545   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_5  渠道代码_交易对手客户编号_tfidf_6  渠道代码_交易对手客户编号_tfidf_7  \\\n",
       "0              -0.052425               0.091839               0.220949   \n",
       "1              -0.016413              -0.031555              -0.021995   \n",
       "2               0.019194               0.000861              -0.004176   \n",
       "3               0.009431              -0.001184               0.027686   \n",
       "4               0.046775               0.007150              -0.024400   \n",
       "\n",
       "   渠道代码_交易对手客户编号_tfidf_8  渠道代码_交易对手客户编号_tfidf_9  渠道代码_交易对手客户编号_countvec_0  \\\n",
       "0              -0.014612              -0.045959                  0.056557   \n",
       "1              -0.002435               0.001869                  0.011952   \n",
       "2              -0.003173              -0.004939                  0.142981   \n",
       "3              -0.022390              -0.055930                  0.634773   \n",
       "4              -0.002993              -0.006095                  0.039697   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_1  渠道代码_交易对手客户编号_countvec_2  \\\n",
       "0                  1.864093                  6.557424   \n",
       "1                  0.253371                  0.960214   \n",
       "2                  6.949969                 10.172628   \n",
       "3                 21.849386                 46.912099   \n",
       "4                  1.746553                  2.098073   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_3  渠道代码_交易对手客户编号_countvec_4  \\\n",
       "0                 -0.155376                  1.547072   \n",
       "1                 -0.015594                  0.776399   \n",
       "2                 -0.354891                  0.547250   \n",
       "3                 -1.122035                  3.347970   \n",
       "4                 -0.060061                  1.619576   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_5  渠道代码_交易对手客户编号_countvec_6  \\\n",
       "0                  0.360551                 14.312553   \n",
       "1                  0.343235                  5.580273   \n",
       "2                  3.946043                  0.827492   \n",
       "3                 -0.331290                 30.115275   \n",
       "4                  2.563606                 11.115729   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_7  渠道代码_交易对手客户编号_countvec_8  \\\n",
       "0                  0.776877                 -2.122171   \n",
       "1                  0.536097                 -0.743166   \n",
       "2                  3.289976                 -0.392159   \n",
       "3                -11.977400                 -8.193690   \n",
       "4                  3.037973                 -1.370317   \n",
       "\n",
       "   渠道代码_交易对手客户编号_countvec_9  摘要信息_交易对手客户编号_tfidf_0  摘要信息_交易对手客户编号_tfidf_1  \\\n",
       "0                 -0.223114               0.419106               0.016600   \n",
       "1                  0.109909               0.927692              -0.086164   \n",
       "2                 -2.222795               0.023494               0.102962   \n",
       "3                 -7.806300               0.273279               0.036439   \n",
       "4                 -0.048690               0.616211              -0.048664   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_2  摘要信息_交易对手客户编号_tfidf_3  摘要信息_交易对手客户编号_tfidf_4  \\\n",
       "0              -0.052080              -0.023534              -0.013723   \n",
       "1              -0.094302              -0.032104              -0.012549   \n",
       "2               0.020178              -0.002688              -0.000106   \n",
       "3               0.008137               0.145022               0.088603   \n",
       "4              -0.057070              -0.006821              -0.025947   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_5  摘要信息_交易对手客户编号_tfidf_6  摘要信息_交易对手客户编号_tfidf_7  \\\n",
       "0              -0.015781              -0.011465               0.000263   \n",
       "1               0.026771               0.017189              -0.006457   \n",
       "2              -0.001943              -0.004486              -0.005001   \n",
       "3              -0.013096              -0.017238              -0.020591   \n",
       "4               0.512331              -0.037030              -0.004062   \n",
       "\n",
       "   摘要信息_交易对手客户编号_tfidf_8  摘要信息_交易对手客户编号_tfidf_9  摘要信息_交易对手客户编号_countvec_0  \\\n",
       "0               0.006329              -0.008315                  0.004686   \n",
       "1              -0.022007              -0.029549                  0.002460   \n",
       "2              -0.004576              -0.002313                  0.002035   \n",
       "3              -0.023771              -0.007787                  0.021611   \n",
       "4              -0.022019               0.010974                  0.003922   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_1  摘要信息_交易对手客户编号_countvec_2  \\\n",
       "0                  7.081610                 -0.203347   \n",
       "1                  1.642961                 -0.045107   \n",
       "2                 11.582282                 -0.467038   \n",
       "3                 20.482397                 -0.566625   \n",
       "4                  3.888193                 -0.103991   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_3  摘要信息_交易对手客户编号_countvec_4  \\\n",
       "0                 15.557270                 -0.294090   \n",
       "1                  4.847877                 -0.090691   \n",
       "2                 -1.174324                 -0.017412   \n",
       "3                 50.022002                 -0.820114   \n",
       "4                 10.747376                 -0.201868   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_5  摘要信息_交易对手客户编号_countvec_6  \\\n",
       "0                 -0.416516                 -0.169363   \n",
       "1                 -0.832567                 -0.031741   \n",
       "2                 -1.006417                 -0.015536   \n",
       "3                 -2.476982                 -0.255035   \n",
       "4                 -1.029359                 -0.084598   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_7  摘要信息_交易对手客户编号_countvec_8  \\\n",
       "0                 -0.083976                 -0.020281   \n",
       "1                 -0.027015                  0.012764   \n",
       "2                 -0.011788                  0.019163   \n",
       "3                 -0.253709                 -0.030940   \n",
       "4                 -0.058490                  0.003925   \n",
       "\n",
       "   摘要信息_交易对手客户编号_countvec_9  客户编号_交易代码_渠道代码_w2v_0  客户编号_交易代码_渠道代码_w2v_1  \\\n",
       "0                 -0.315465              1.216363              0.387643   \n",
       "1                 -0.088627              1.044738              0.462644   \n",
       "2                  0.003630              0.826676              0.204695   \n",
       "3                 -0.391722              1.085397              0.202935   \n",
       "4                 -0.189564              1.079759              0.517163   \n",
       "\n",
       "   客户编号_交易代码_渠道代码_w2v_2  客户编号_交易代码_渠道代码_w2v_3  客户编号_交易代码_渠道代码_w2v_4  \\\n",
       "0              0.041235              0.122863              0.089642   \n",
       "1              0.349524              0.223502              0.210997   \n",
       "2             -0.542103             -0.000163              0.180474   \n",
       "3             -0.169672              0.129676              0.259515   \n",
       "4              0.122800              0.164728              0.133404   \n",
       "\n",
       "   客户编号_交易代码_渠道代码_w2v_5  客户编号_交易代码_渠道代码_w2v_6  客户编号_交易代码_渠道代码_w2v_7  \\\n",
       "0              0.360907              0.173583             -0.064026   \n",
       "1              0.544947              0.017120             -0.043680   \n",
       "2              0.180687             -0.122410              0.016429   \n",
       "3              0.462957              0.130256             -0.305817   \n",
       "4              0.410240              0.037670             -0.004558   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_0  客户编号_交易代码_摘要信息_w2v_1  客户编号_交易代码_摘要信息_w2v_2  \\\n",
       "0             -1.275527             -2.232501             -1.986748   \n",
       "1             -1.613089             -2.583142             -2.152398   \n",
       "2             -1.021788             -0.272600             -1.789511   \n",
       "3             -1.278671             -1.846682             -1.628390   \n",
       "4             -1.486575             -2.373965             -2.459573   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_3  客户编号_交易代码_摘要信息_w2v_4  客户编号_交易代码_摘要信息_w2v_5  \\\n",
       "0              0.746150              1.797314              0.352396   \n",
       "1              0.150843              1.850057             -0.022702   \n",
       "2              0.565752              2.424536             -0.423115   \n",
       "3              0.624487              1.518664              0.057439   \n",
       "4              0.558129              2.161080              0.274007   \n",
       "\n",
       "   客户编号_交易代码_摘要信息_w2v_6  客户编号_交易代码_摘要信息_w2v_7  客户编号_交易代码_交易对手客户编号_w2v_0  \\\n",
       "0             -1.015768              0.462867                  0.566154   \n",
       "1             -1.284593              0.659154                  0.069912   \n",
       "2             -0.470062             -0.397190                  1.329337   \n",
       "3             -0.598697              0.338764                  0.698881   \n",
       "4             -1.494697              0.878058                  0.552586   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_1  客户编号_交易代码_交易对手客户编号_w2v_2  \\\n",
       "0                 -0.368129                 -0.174086   \n",
       "1                 -0.546371                  0.007978   \n",
       "2                  0.479260                  0.200958   \n",
       "3                  0.031054                 -0.145838   \n",
       "4                 -0.386907                 -0.155726   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_3  客户编号_交易代码_交易对手客户编号_w2v_4  \\\n",
       "0                 -1.619980                 -0.334377   \n",
       "1                 -1.766838                 -0.242589   \n",
       "2                 -0.848595                 -0.169067   \n",
       "3                 -1.469987                 -0.148904   \n",
       "4                 -1.659545                 -0.318225   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_5  客户编号_交易代码_交易对手客户编号_w2v_6  \\\n",
       "0                  1.329901                 -1.552544   \n",
       "1                  1.300891                 -1.232311   \n",
       "2                  1.447362                 -1.504765   \n",
       "3                  1.424731                 -1.489185   \n",
       "4                  1.383669                 -1.387218   \n",
       "\n",
       "   客户编号_交易代码_交易对手客户编号_w2v_7  客户编号_渠道代码_摘要信息_w2v_0  客户编号_渠道代码_摘要信息_w2v_1  \\\n",
       "0                 -0.683254              0.606885             -0.580380   \n",
       "1                 -0.658328              0.748789             -0.674275   \n",
       "2                 -0.733271              0.182754             -1.634504   \n",
       "3                 -0.453307              0.231479             -0.645183   \n",
       "4                 -0.856718              0.895416             -0.505446   \n",
       "\n",
       "   客户编号_渠道代码_摘要信息_w2v_2  客户编号_渠道代码_摘要信息_w2v_3  客户编号_渠道代码_摘要信息_w2v_4  \\\n",
       "0              0.436700             -1.445817             -0.862692   \n",
       "1              0.760463             -1.553597             -1.171873   \n",
       "2             -0.224778             -1.601706              0.279196   \n",
       "3              0.041408             -0.725097             -0.169144   \n",
       "4              0.888153             -1.531110             -0.831957   \n",
       "\n",
       "   客户编号_渠道代码_摘要信息_w2v_5  客户编号_渠道代码_摘要信息_w2v_6  客户编号_渠道代码_摘要信息_w2v_7  \\\n",
       "0             -0.516107             -3.854006              1.488673   \n",
       "1             -0.458338             -4.781826              1.404563   \n",
       "2             -0.180027             -2.420538              1.218689   \n",
       "3             -0.441308             -2.946208              1.374965   \n",
       "4             -0.358353             -4.640062              1.411405   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_0  客户编号_渠道代码_交易对手客户编号_w2v_1  \\\n",
       "0                  0.283305                 -0.758342   \n",
       "1                  0.476772                 -0.980687   \n",
       "2                 -0.436527                 -0.123236   \n",
       "3                  0.065402                 -0.540489   \n",
       "4                  0.327792                 -0.868070   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_2  客户编号_渠道代码_交易对手客户编号_w2v_3  \\\n",
       "0                 -1.447038                  2.477799   \n",
       "1                 -1.494675                  2.663459   \n",
       "2                 -1.626440                  1.532142   \n",
       "3                 -1.105266                  2.261206   \n",
       "4                 -1.383799                  2.445411   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_4  客户编号_渠道代码_交易对手客户编号_w2v_5  \\\n",
       "0                 -0.113421                  0.460692   \n",
       "1                 -0.016259                  0.320416   \n",
       "2                 -0.493065                  1.133597   \n",
       "3                  0.090320                  0.749933   \n",
       "4                  0.043965                  0.412626   \n",
       "\n",
       "   客户编号_渠道代码_交易对手客户编号_w2v_6  客户编号_渠道代码_交易对手客户编号_w2v_7  \\\n",
       "0                 -1.110022                 -0.269635   \n",
       "1                 -1.267092                 -0.348978   \n",
       "2                 -0.741304                 -0.691263   \n",
       "3                 -1.146302                 -0.647657   \n",
       "4                 -1.160302                 -0.433618   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_0  客户编号_摘要信息_交易对手客户编号_w2v_1  \\\n",
       "0                  0.127737                 -1.632896   \n",
       "1                 -0.024728                 -1.914073   \n",
       "2                  0.222341                 -1.410666   \n",
       "3                 -0.176236                 -1.444758   \n",
       "4                 -0.217280                 -1.730701   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_2  客户编号_摘要信息_交易对手客户编号_w2v_3  \\\n",
       "0                  2.295154                  0.965088   \n",
       "1                  2.964499                  1.383257   \n",
       "2                  1.065484                 -0.013875   \n",
       "3                  1.883517                  0.630231   \n",
       "4                  2.426631                  1.139691   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_4  客户编号_摘要信息_交易对手客户编号_w2v_5  \\\n",
       "0                 -0.534077                  2.403713   \n",
       "1                 -0.749559                  3.366637   \n",
       "2                  0.627372                  1.665259   \n",
       "3                 -0.187371                  1.957782   \n",
       "4                 -0.668761                  3.439743   \n",
       "\n",
       "   客户编号_摘要信息_交易对手客户编号_w2v_6  客户编号_摘要信息_交易对手客户编号_w2v_7  \n",
       "0                 -0.769188                  2.437117  \n",
       "1                 -0.936711                  2.617786  \n",
       "2                 -0.207545                  0.923987  \n",
       "3                 -0.172917                  1.954081  \n",
       "4                 -0.801678                  2.302606  \n",
       "\n",
       "[5 rows x 607 columns]"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "TARGET = TARGET.merge(feature_hyy, on = '客户编号', how = 'left')\n",
    "TARGET.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "59a2b9d1-be02-4005-9b69-c55936e4b4a1",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:19:10.198424Z",
     "iopub.status.busy": "2024-11-11T03:19:10.197631Z",
     "iopub.status.idle": "2024-11-11T03:19:10.207455Z",
     "msg_id": "8d2edf9d-c7e8-4f7c-9a94-83c5237d46aa",
     "shell.execute_reply": "2024-11-11T03:19:10.206596Z",
     "shell.execute_reply.started": "2024-11-11T03:19:10.198391Z"
    }
   },
   "outputs": [],
   "source": [
    "columns_basic = ['客户编号', '交易代码_摘要信息_tfidf_7', '客户编号_所在省份编码_w2v_1', '交易代码_摘要信息_tfidf_2', '交易代码_交易对手客户编号_tfidf_9', '客户编号_交易代码_交易对手客户编号_w2v_7', '交易代码_渠道代码_tfidf_8', '交易代码_交易对手客户编号_tfidf_6', '摘要信息_交易对手客户编号_tfidf_5', '客户编号_摘要信息_交易对手客户编号_w2v_0', '客户编号_摘要信息_w2v_5', '摘要信息_tfidf_8', '渠道代码_摘要信息_tfidf_7', '摘要信息_交易对手客户编号_countvec_0', '渠道代码_摘要信息_tfidf_1', '客户编号_企业机构类型_所在省份_w2v_4', '客户编号_所在省份编码_w2v_6', '渠道代码_tfidf_2', '注册资本', '已成立天数_月', '所在省份编码_tfidf_9', '客户编号_摘要信息_w2v_3', '所在省份编码_tfidf_0', '渠道代码_摘要信息_tfidf_9', '已经营天数_天', '已经营天数_月', '已成立天数_天', '企业（机构）类型编码_是否坏率最高2类', '摘要信息_交易对手客户编号_countvec_8', '客户编号_交易代码_摘要信息_w2v_1', '渠道代码_tfidf_6', '客户编号_摘要信息_w2v_0', '渠道代码_tfidf_9', '客户编号_交易代码_摘要信息_w2v_0', '客户编号_交易代码_w2v_1', '客户编号_所在省份编码_w2v_3', '交易代码_tfidf_8', '渠道代码_摘要信息_tfidf_8', '客户编号_所在省份编码_w2v_2', '交易代码_交易对手客户编号_tfidf_3', '客户编号_交易代码_摘要信息_w2v_7', '渠道代码_tfidf_5', '交易代码_摘要信息_countvec_4', '客户编号_渠道代码_摘要信息_w2v_5', '摘要信息_交易对手客户编号_tfidf_9', '所在省份_国民经济行业代码_tfidf_7', '渠道代码_摘要信息_countvec_6', '渠道代码_交易对手客户编号_tfidf_9', '渠道代码_交易对手客户编号_tfidf_3', '客户编号_企业机构类型_国民经济行业代码_w2v_6', '客户编号_法定代表人_w2v_2', '客户编号_摘要信息_交易对手客户编号_w2v_4', '摘要信息_countvec_5', '客户编号_法定代表人_w2v_5', '客户编号_交易代码_渠道代码_w2v_4', '客户编号_渠道代码_摘要信息_w2v_1', '客户编号_企业机构类型_所在省份_w2v_7', '客户编号_交易代码_w2v_4', '交易代码_渠道代码_tfidf_4', '渠道代码_交易对手客户编号_tfidf_4', '渠道代码_摘要信息_tfidf_5', '渠道代码_摘要信息_tfidf_2', '交易代码_渠道代码_tfidf_2', '客户编号_摘要信息_w2v_1', '摘要信息_交易对手客户编号_tfidf_4', '客户编号_企业机构类型_所在省份_w2v_0', '客户编号_企业机构类型_所在省份_w2v_2', '渠道代码_tfidf_0', '客户编号_渠道代码_摘要信息_w2v_7', '客户编号_企业机构类型_国民经济行业代码_w2v_3', '摘要信息_交易对手客户编号_countvec_5', '交易代码_countvec_6', '法定代表人_tfidf_9', '客户编号_企业机构类型_所在省份_w2v_6', '渠道代码_交易对手客户编号_tfidf_8', '交易代码_tfidf_2', '客户编号_企业机构类型_所在省份_w2v_5', '渠道代码_交易对手客户编号_tfidf_6', '企业机构类型_所在省份_tfidf_1', '客户编号_交易对手客户编号_w2v_5', '企业机构类型_国民经济行业代码_tfidf_7', '摘要信息_countvec_9', '客户编号_交易代码_交易对手客户编号_w2v_4', '客户编号_所在省份_国民经济行业代码_w2v_0', '客户编号_渠道代码_交易对手客户编号_w2v_4', '客户编号_渠道代码_w2v_3', '企业机构类型_所在省份_tfidf_3', '客户编号_法定代表人_w2v_3', '摘要信息_交易对手客户编号_countvec_3', '客户编号_法定代表人_w2v_7', '交易代码_摘要信息_countvec_9', '交易代码_渠道代码_countvec_6', '企业机构类型_所在省份_tfidf_5', '所在省份_国民经济行业代码_tfidf_8', '客户编号_交易代码_摘要信息_w2v_6', '渠道代码_摘要信息_countvec_5', '渠道代码_交易对手客户编号_tfidf_7', '客户编号_摘要信息_w2v_4', '客户编号_渠道代码_摘要信息_w2v_3', '法定代表人_tfidf_3', '客户编号_所在省份编码_w2v_0', '交易代码_摘要信息_tfidf_3', '交易代码_tfidf_5', '交易对手客户编号_tfidf_0', '国民经济行业代码_tfidf_9', '渠道代码_摘要信息_countvec_9']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "f951f82c-d844-4731-862e-4246605225c7",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:19:10.347666Z",
     "iopub.status.busy": "2024-11-11T03:19:10.347088Z",
     "iopub.status.idle": "2024-11-11T03:19:10.355051Z",
     "msg_id": "6fdc5c0d-a056-49c2-8d89-20ddf31a17f9",
     "shell.execute_reply": "2024-11-11T03:19:10.354357Z",
     "shell.execute_reply.started": "2024-11-11T03:19:10.347635Z"
    }
   },
   "outputs": [],
   "source": [
    "columns_tr = ['客户编号', '总交易金额占比_mean', '总交易金额占比_max', '总交易金额占比_min', '转入金额占比_mean', '转入金额占比_max', '转入金额占比_min', '转出金额占比_mean', '转出金额占比_max', '转出金额占比_min', '本人金额占比_mean', '本人金额占比_max', '本人金额占比_min', '非本人金额占比_mean', '非本人金额占比_max', '非本人金额占比_min', '交易次数小于等于5', '总流出金额', '总流出笔数', '流出平均金额', '流出金额方差', '流出金额最大值', '总流入金额', '总流入笔数', '流入平均金额', '流入金额方差', '流入金额最大值', '总净流', '总金额', '总笔数', '近一月流出金额', '近一月流出笔数', '近一月流入金额', '近一月流入笔数', '近一月总净流', '近一月总金额', '近一月总笔数', '倒数第三月流出金额', '倒数第三月流出笔数', '倒数第三月流入金额', '倒数第三月流入笔数', '倒数第三月总净流', '倒数第三月总金额', '倒数第三月总笔数', '第三个月与第一个月流入金额差', '第三个月与第一个月流出金额差', '第三个月与第一个月总金额差', '第三个月与第一个月流出笔数差', '第三个月与第一个月流入笔数差', '第三个月与第一个月总笔数差', '相关客户数', '最后交易日流出金额', '最后交易日流出笔数', '最后交易日流入金额', '最后交易日流入笔数', '最后交易日总净流', '最后交易日总金额', '最后交易日总笔数', '非工作日交易金额', '非工作日交易笔数', '企业交易绝对值最高金额', '企业交易绝对值最低金额', '企业交易绝对值_mean', '企业交易绝对值_std', '近一月平均账户余额', '近一月最大账户余额', '近一月账户余额方差', '倒数第三个月平均账户余额', '倒数第三个月最大账户余额', '倒数第三个月账户余额方差', '第三个月与第一个月余额均值差', '第三个月与第一个月余额最大差', '近一月交易代码个数', '近一月渠道代码个数', '倒数第三月交易代码个数', '倒数第三月渠道代码个数', '第三个月与第一个月渠道数差', '第三个月与第一个月交易代码数差', '交易代码_count_低频交易日', '渠道代码_count_低频交易日', '合约账户余额_mean_低频交易日', '合约账户余额_min_低频交易日', '合约账户余额_max_低频交易日', '合约账户余额_std_低频交易日', '折人民币交易金额_count_低频交易日', '折人民币交易金额_sum_低频交易日', '折人民币交易金额_max_低频交易日', '折人民币交易金额_std_低频交易日', '折人民币交易金额_skew_低频交易日', '折人民币交易金额_count_notzero_低频交易日', '折人民币交易金额_count_zero_低频交易日', '交易对手客户编号_nunique_低频交易日']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "f8547ea3-4151-41f0-8814-555f91463a5f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:19:12.729370Z",
     "iopub.status.busy": "2024-11-11T03:19:12.728859Z",
     "iopub.status.idle": "2024-11-11T03:19:13.335118Z",
     "msg_id": "2576c6a4-962b-4fbe-842e-abd18bb16a2f",
     "shell.execute_reply": "2024-11-11T03:19:13.334338Z",
     "shell.execute_reply.started": "2024-11-11T03:19:12.729337Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 106)\n"
     ]
    }
   ],
   "source": [
    "feature_hyy_basic = TARGET[columns_basic]\n",
    "print(feature_hyy_basic.shape)\n",
    "feature_hyy_basic.to_pickle(\"../data/基本信息表_原本数据加文本特征_B榜.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "f7b1479b-1957-4a7c-ace8-4e232cc52da8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-11T03:19:13.336803Z",
     "iopub.status.busy": "2024-11-11T03:19:13.336449Z",
     "iopub.status.idle": "2024-11-11T03:19:13.449849Z",
     "msg_id": "f8b74ff0-ec24-4458-a0c1-9b6c81369313",
     "shell.execute_reply": "2024-11-11T03:19:13.449120Z",
     "shell.execute_reply.started": "2024-11-11T03:19:13.336773Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59116, 92)\n"
     ]
    }
   ],
   "source": [
    "feature_hyy_tr = TARGET[columns_tr]\n",
    "print(feature_hyy_tr.shape)\n",
    "feature_hyy_tr.to_pickle(\"../data/交易流水表_原本数据加文本特征_B榜.pkl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "193f5b6d-bc7a-4bfd-88ab-9abb46e658b4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
